archive revision 290152
1#------------------------------------------------------------------------------
2# $File: archive,v 1.91 2015/09/16 13:49:33 christos Exp $
3# archive:  file(1) magic for archive formats (see also "msdos" for self-
4#           extracting compressed archives)
5#
6# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
7# pre-POSIX "tar" archives are handled in the C code.
8
9# POSIX tar archives
10257	string		ustar\0		POSIX tar archive
11!:mime	application/x-tar # encoding: posix
12257	string		ustar\040\040\0	GNU tar archive
13!:mime	application/x-tar # encoding: gnu
14
15# Incremental snapshot gnu-tar format from:
16# http://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html
170	string		GNU\ tar-	GNU tar incremental snapshot data
18>&0	regex		[0-9]\.[0-9]+-[0-9]+	version %s
19
20# cpio archives
21#
22# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
23# The idea is to indicate archives produced on machines with the same
24# byte order as the machine running "file" with "cpio archive", and
25# to indicate archives produced on machines with the opposite byte order
26# from the machine running "file" with "byte-swapped cpio archive".
27#
28# The SVR4 "cpio(4)" hints that there are additional formats, but they
29# are defined as "short"s; I think all the new formats are
30# character-header formats and thus are strings, not numbers.
310	short		070707		cpio archive
32!:mime	application/x-cpio
330	short		0143561		byte-swapped cpio archive
34!:mime	application/x-cpio # encoding: swapped
350	string		070707		ASCII cpio archive (pre-SVR4 or odc)
360	string		070701		ASCII cpio archive (SVR4 with no CRC)
370	string		070702		ASCII cpio archive (SVR4 with CRC)
38
39#
40# Various archive formats used by various versions of the "ar"
41# command.
42#
43
44#
45# Original UNIX archive formats.
46# They were written with binary values in host byte order, and
47# the magic number was a host "int", which might have been 16 bits
48# or 32 bits.  We don't say "PDP-11" or "VAX", as there might have
49# been ports to little-endian 16-bit-int or 32-bit-int platforms
50# (x86?) using some of those formats; if none existed, feel free
51# to use "PDP-11" for little-endian 16-bit and "VAX" for little-endian
52# 32-bit.  There might have been big-endian ports of that sort as
53# well.
54#
550	leshort		0177555		very old 16-bit-int little-endian archive
560	beshort		0177555		very old 16-bit-int big-endian archive
570	lelong		0177555		very old 32-bit-int little-endian archive
580	belong		0177555		very old 32-bit-int big-endian archive
59
600	leshort		0177545		old 16-bit-int little-endian archive
61>2	string		__.SYMDEF	random library
620	beshort		0177545		old 16-bit-int big-endian archive
63>2	string		__.SYMDEF	random library
640	lelong		0177545		old 32-bit-int little-endian archive
65>4	string		__.SYMDEF	random library
660	belong		0177545		old 32-bit-int big-endian archive
67>4	string		__.SYMDEF	random library
68
69#
70# From "pdp" (but why a 4-byte quantity?)
71#
720	lelong		0x39bed		PDP-11 old archive
730	lelong		0x39bee		PDP-11 4.0 archive
74
75#
76# XXX - what flavor of APL used this, and was it a variant of
77# some ar archive format?  It's similar to, but not the same
78# as, the APL workspace magic numbers in pdp.
79#
800	long		0100554		apl workspace
81
82#
83# System V Release 1 portable(?) archive format.
84#
850	string		=<ar>		System V Release 1 ar archive
86!:mime	application/x-archive
87
88#
89# Debian package; it's in the portable archive format, and needs to go
90# before the entry for regular portable archives, as it's recognized as
91# a portable archive whose first member has a name beginning with
92# "debian".
93#
940	string		=!<arch>\ndebian
95>8	string		debian-split	part of multipart Debian package
96!:mime	application/vnd.debian.binary-package
97>8	string		debian-binary	Debian binary package
98!:mime	application/vnd.debian.binary-package
99>8	string		!debian
100>68	string		>\0		(format %s)
101# These next two lines do not work, because a bzip2 Debian archive
102# still uses gzip for the control.tar (first in the archive).  Only
103# data.tar varies, and the location of its filename varies too.
104# file/libmagic does not current have support for ascii-string based
105# (offsets) as of 2005-09-15.
106#>81	string		bz2		\b, uses bzip2 compression
107#>84	string		gz		\b, uses gzip compression
108#>136	ledate		x		created: %s
109
110#
111# MIPS archive; they're in the portable archive format, and need to go
112# before the entry for regular portable archives, as it's recognized as
113# a portable archive whose first member has a name beginning with
114# "__________E".
115#
1160	string	=!<arch>\n__________E	MIPS archive
117!:mime	application/x-archive
118>20	string	U			with MIPS Ucode members
119>21	string	L			with MIPSEL members
120>21	string	B			with MIPSEB members
121>19	string	L			and an EL hash table
122>19	string	B			and an EB hash table
123>22	string	X			-- out of date
124
1250	search/1	-h-		Software Tools format archive text
126
127#
128# BSD/SVR2-and-later portable archive formats.
129#
1300	string		=!<arch>		current ar archive
131!:mime	application/x-archive
132>8	string		__.SYMDEF	random library
133>68	string		__.SYMDEF\ SORTED	random library
134
135#
136# "Thin" archive, as can be produced by GNU ar.
137#
1380	string		=!<thin>\n	thin archive with
139>68	belong		0		no symbol entries
140>68	belong		1		%d symbol entry
141>68	belong		>1		%d symbol entries
142
143# ARC archiver, from Daniel Quinlan (quinlan@yggdrasil.com)
144#
145# The first byte is the magic (0x1a), byte 2 is the compression type for
146# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
147# filename of the first file (null terminated).  Since some types collide
148# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
149# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%).  0x01 collides with terminfo.
1500	lelong&0x8080ffff	0x0000081a	ARC archive data, dynamic LZW
151!:mime	application/x-arc
1520	lelong&0x8080ffff	0x0000091a	ARC archive data, squashed
153!:mime	application/x-arc
1540	lelong&0x8080ffff	0x0000021a	ARC archive data, uncompressed
155!:mime	application/x-arc
1560	lelong&0x8080ffff	0x0000031a	ARC archive data, packed
157!:mime	application/x-arc
1580	lelong&0x8080ffff	0x0000041a	ARC archive data, squeezed
159!:mime	application/x-arc
1600	lelong&0x8080ffff	0x0000061a	ARC archive data, crunched
161!:mime	application/x-arc
162# [JW] stuff taken from idarc, obviously ARC successors:
1630	lelong&0x8080ffff	0x00000a1a	PAK archive data
164!:mime	application/x-arc
1650	lelong&0x8080ffff	0x0000141a	ARC+ archive data
166!:mime	application/x-arc
1670	lelong&0x8080ffff	0x0000481a	HYP archive data
168!:mime	application/x-arc
169
170# Acorn archive formats (Disaster prone simpleton, m91dps@ecs.ox.ac.uk)
171# I can't create either SPARK or ArcFS archives so I have not tested this stuff
172# [GRR:  the original entries collide with ARC, above; replaced with combined
173#  version (not tested)]
174#0	byte		0x1a		RISC OS archive (spark format)
1750	string		\032archive	RISC OS archive (ArcFS format)
1760       string          Archive\000     RISC OS archive (ArcFS format)
177
178# All these were taken from idarc, many could not be verified. Unfortunately,
179# there were many low-quality sigs, i.e. easy to trigger false positives.
180# Please notify me of any real-world fishy/ambiguous signatures and I'll try
181# to get my hands on the actual archiver and see if I find something better. [JW]
182# probably many can be enhanced by finding some 0-byte or control char near the start
183
184# idarc calls this Crush/Uncompressed... *shrug*
1850	string	CRUSH Crush archive data
186# Squeeze It (.sqz)
1870	string	HLSQZ Squeeze It archive data
188# SQWEZ
1890	string	SQWEZ SQWEZ archive data
190# HPack (.hpk)
1910	string	HPAK HPack archive data
192# HAP
1930	string	\x91\x33HF HAP archive data
194# MD/MDCD
1950	string	MDmd MDCD archive data
196# LIM
1970	string	LIM\x1a LIM archive data
198# SAR
1993	string	LH5 SAR archive data
200# BSArc/BS2
2010	string	\212\3SB\020\0	BSArc/BS2 archive data
202# Bethesda Softworks Archive (Oblivion)
2030	string	BSA\0 		BSArc archive data
204>4	lelong	x		version %d
205# MAR
2062	string	=-ah MAR archive data
207# ACB
208#0	belong&0x00f800ff	0x00800000 ACB archive data
209# CPZ
210# TODO, this is what idarc says: 0	string	\0\0\0 CPZ archive data
211# JRC
2120	string	JRchive JRC archive data
213# Quantum
2140	string	DS\0 Quantum archive data
215# ReSOF
2160	string	PK\3\6 ReSOF archive data
217# QuArk
2180	string	7\4 QuArk archive data
219# YAC
22014	string	YC YAC archive data
221# X1
2220	string	X1 X1 archive data
2230	string	XhDr X1 archive data
224# CDC Codec (.dqt)
2250	belong&0xffffe000	0x76ff2000 CDC Codec archive data
226# AMGC
2270	string	\xad6" AMGC archive data
228# NuLIB
2290	string	N\xc3\xb5F\xc3\xa9lx\xc3\xa5 NuLIB archive data
230# PakLeo
2310	string	LEOLZW PAKLeo archive data
232# ChArc
2330	string	SChF ChArc archive data
234# PSA
2350	string	PSA PSA archive data
236# CrossePAC
2370	string	DSIGDCC CrossePAC archive data
238# Freeze
2390	string	\x1f\x9f\x4a\x10\x0a Freeze archive data
240# KBoom
2410	string	\xc2\xa8MP\xc2\xa8 KBoom archive data
242# NSQ, must go after CDC Codec
2430	string	\x76\xff NSQ archive data
244# DPA
2450	string	Dirk\ Paehl DPA archive data
246# BA
247# TODO: idarc says "bytes 0-2 == bytes 3-5"
248# TTComp
2490	string	\0\6 TTComp archive data
250# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
2510	string	ESP ESP archive data
252# ZPack
2530	string	\1ZPK\1 ZPack archive data
254# Sky
2550	string	\xbc\x40 Sky archive data
256# UFA
2570	string	UFA UFA archive data
258# Dry
2590	string	=-H2O DRY archive data
260# FoxSQZ
2610	string	FOXSQZ FoxSQZ archive data
262# AR7
2630	string	,AR7 AR7 archive data
264# PPMZ
2650	string	PPMZ PPMZ archive data
266# MS Compress
2674	string	\x88\xf0\x27 MS Compress archive data
268# updated by Joerg Jenderek
269>9	string	\0
270>>0	string	KWAJ
271>>>7	string	\321\003	MS Compress archive data
272>>>>14	ulong	>0		\b, original size: %d bytes
273>>>>18		ubyte	>0x65
274>>>>>18		string	x       \b, was %.8s
275>>>>>(10.b-4)	string	x       \b.%.3s
276# MP3 (archiver, not lossy audio compression)
2770	string	MP3\x1a MP3-Archiver archive data
278# ZET
2790	string	OZ\xc3\x9d ZET archive data
280# TSComp
2810	string	\x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data
282# ARQ
2830	string	gW\4\1 ARQ archive data
284# Squash
2853	string	OctSqu Squash archive data
286# Terse
2870	string	\5\1\1\0 Terse archive data
288# PUCrunch
2890	string	\x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data
290# UHarc
2910	string	UHA UHarc archive data
292# ABComp
2930	string	\2AB ABComp archive data
2940	string	\3AB2 ABComp archive data
295# CMP
2960	string	CO\0 CMP archive data
297# Splint
2980	string	\x93\xb9\x06 Splint archive data
299# InstallShield
3000	string	\x13\x5d\x65\x8c InstallShield Z archive Data
301# Gather
3021	string	GTH Gather archive data
303# BOA
3040	string	BOA BOA archive data
305# RAX
3060	string	ULEB\xa RAX archive data
307# Xtreme
3080	string	ULEB\0 Xtreme archive data
309# Pack Magic
3100	string	@\xc3\xa2\1\0 Pack Magic archive data
311# BTS
3120	belong&0xfeffffff	0x1a034465 BTS archive data
313# ELI 5750
3140	string	Ora\  ELI 5750 archive data
315# QFC
3160	string	\x1aFC\x1a QFC archive data
3170	string	\x1aQF\x1a QFC archive data
318# PRO-PACK
3190	string	RNC PRO-PACK archive data
320# 777
3210	string	777 777 archive data
322# LZS221
3230	string	sTaC LZS221 archive data
324# HPA
3250	string	HPA HPA archive data
326# Arhangel
3270	string	LG Arhangel archive data
328# EXP1, uses bzip2
3290	string	0123456789012345BZh EXP1 archive data
330# IMP
3310	string	IMP\xa IMP archive data
332# NRV
3330	string	\x00\x9E\x6E\x72\x76\xFF NRV archive data
334# Squish
3350	string	\x73\xb2\x90\xf4 Squish archive data
336# Par
3370	string	PHILIPP Par archive data
3380	string	PAR Par archive data
339# HIT
3400	string	UB HIT archive data
341# SBX
3420	belong&0xfffff000	0x53423000 SBX archive data
343# NaShrink
3440	string	NSK NaShrink archive data
345# SAPCAR
3460	string	#\ CAR\ archive\ header SAPCAR archive data
3470	string	CAR\ 2.00RG SAPCAR archive data
348# Disintegrator
3490	string	DST Disintegrator archive data
350# ASD
3510	string	ASD ASD archive data
352# InstallShield CAB
3530	string	ISc( InstallShield CAB
354# TOP4
3550	string	T4\x1a TOP4 archive data
356# BatComp left out: sig looks like COM executable
357# so TODO: get real 4dos batcomp file and find sig
358# BlakHole
3590	string	BH\5\7 BlakHole archive data
360# BIX
3610	string	BIX0 BIX archive data
362# ChiefLZA
3630	string	ChfLZ ChiefLZA archive data
364# Blink
3650	string	Blink Blink archive data
366# Logitech Compress
3670	string	\xda\xfa Logitech Compress archive data
368# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
3691	string	(C)\ STEPANYUK ARS-Sfx archive data
370# AKT/AKT32
3710	string	AKT32 AKT32 archive data
3720	string	AKT AKT archive data
373# NPack
3740	string	MSTSM NPack archive data
375# PFT
3760	string	\0\x50\0\x14 PFT archive data
377# SemOne
3780	string	SEM SemOne archive data
379# PPMD
3800	string	\x8f\xaf\xac\x84 PPMD archive data
381# FIZ
3820	string	FIZ FIZ archive data
383# MSXiE
3840	belong&0xfffff0f0	0x4d530000 MSXiE archive data
385# DeepFreezer
3860	belong&0xfffffff0	0x797a3030 DeepFreezer archive data
387# DC
3880	string	=<DC- DC archive data
389# TPac
3900	string	\4TPAC\3 TPac archive data
391# Ai
3920	string	Ai\1\1\0 Ai archive data
3930	string	Ai\1\0\0 Ai archive data
394# Ai32
3950	string	Ai\2\0 Ai32 archive data
3960	string	Ai\2\1 Ai32 archive data
397# SBC
3980	string	SBC SBC archive data
399# Ybs
4000	string	YBS Ybs archive data
401# DitPack
4020	string	\x9e\0\0 DitPack archive data
403# DMS
4040	string	DMS! DMS archive data
405# EPC
4060	string	\x8f\xaf\xac\x8c EPC archive data
407# VSARC
4080	string	VS\x1a VSARC archive data
409# PDZ
4100	string	PDZ PDZ archive data
411# ReDuq
4120	string	rdqx ReDuq archive data
413# GCA
4140	string	GCAX GCA archive data
415# PPMN
4160	string	pN PPMN archive data
417# WinImage
4183	string	WINIMAGE WinImage archive data
419# Compressia
4200	string	CMP0CMP Compressia archive data
421# UHBC
4220	string	UHB UHBC archive data
423# WinHKI
4240	string	\x61\x5C\x04\x05 WinHKI archive data
425# WWPack data file
4260	string	WWP WWPack archive data
427# BSN (BSA, PTS-DOS)
4280	string	\xffBSG BSN archive data
4291	string	\xffBSG BSN archive data
4303	string	\xffBSG BSN archive data
4311	string	\0\xae\2 BSN archive data
4321	string	\0\xae\3 BSN archive data
4331	string	\0\xae\7 BSN archive data
434# AIN
4350	string	\x33\x18 AIN archive data
4360	string	\x33\x17 AIN archive data
437# XPA32 test moved and merged with XPA by Joerg Jenderek at Sep 2015
438# SZip (TODO: doesn't catch all versions)
4390	string	SZ\x0a\4 SZip archive data
440# XPack DiskImage
441# *.XDI updated by Joerg Jenderek Sep 2015
442# ftp://ftp.sac.sk/pub/sac/pack/0index.txt 
443# GRR: this test is still too general as it catches also text files starting with jm
4440	string	jm	
445# only found examples with this additional characteristic 2 bytes
446>2	string	\x2\x4	Xpack DiskImage archive data
447#!:ext xdi
448# XPack Data
449# *.xpa updated by Joerg Jenderek Sep 2015
450# ftp://ftp.elf.stuba.sk/pub/pc/pack/
4510	string	xpa	XPA
452!:ext	xpa
453# XPA32
454# ftp://ftp.elf.stuba.sk/pub/pc/pack/xpa32.zip
455# created by XPA32.EXE version 1.0.2 for Windows
456>0	string	xpa\0\1 \b32 archive data
457# created by XPACK.COM version 1.67m or 1.67r with short 0x1800 
458>3	ubeshort	!0x0001	\bck archive data
459# XPack Single Data
460# changed by Joerg Jenderek Sep 2015 back to like in version 5.12
461# letter 'I'+ acute accent is equivalent to \xcd
4620	string	\xcd\ jm	Xpack single archive data
463#!:mime	application/x-xpa-compressed
464!:ext xpa
465
466# TODO: missing due to unknown magic/magic at end of file:
467#DWC
468#ARG
469#ZAR
470#PC/3270
471#InstallIt
472#RKive
473#RK
474#XPack Diskimage
475
476# These were inspired by idarc, but actually verified
477# Dzip archiver (.dz)
4780	string	DZ Dzip archive data
479>2	byte	x \b, version %i
480>3	byte	x \b.%i
481# ZZip archiver (.zz)
4820	string	ZZ\ \0\0 ZZip archive data
4830	string	ZZ0 ZZip archive data
484# PAQ archiver (.paq)
4850	string	\xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
4860	string	PAQ PAQ archive data
487>3	byte&0xf0	0x30
488>>3	byte	x (v%c)
489# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
4900xe	string	\x1aJar\x1b JAR (ARJ Software, Inc.) archive data
4910	string	JARCS JAR (ARJ Software, Inc.) archive data
492
493# ARJ archiver (jason@jarthur.Claremont.EDU)
4940	leshort		0xea60		ARJ archive data
495!:mime	application/x-arj
496>5	byte		x		\b, v%d,
497>8	byte		&0x04		multi-volume,
498>8	byte		&0x10		slash-switched,
499>8	byte		&0x20		backup,
500>34	string		x		original name: %s,
501>7	byte		0		os: MS-DOS
502>7	byte		1		os: PRIMOS
503>7	byte		2		os: Unix
504>7	byte		3		os: Amiga
505>7	byte		4		os: Macintosh
506>7	byte		5		os: OS/2
507>7	byte		6		os: Apple ][ GS
508>7	byte		7		os: Atari ST
509>7	byte		8		os: NeXT
510>7	byte		9		os: VAX/VMS
511>3	byte		>0		%d]
512# [JW] idarc says this is also possible
5132	leshort		0xea60		ARJ archive data
514
515# HA archiver (Greg Roelofs, newt@uchicago.edu)
516# This is a really bad format. A file containing HAWAII will match this...
517#0	string		HA		HA archive data,
518#>2	leshort		=1		1 file,
519#>2	leshort		>1		%hu files,
520#>4	byte&0x0f	=0		first is type CPY
521#>4	byte&0x0f	=1		first is type ASC
522#>4	byte&0x0f	=2		first is type HSC
523#>4	byte&0x0f	=0x0e		first is type DIR
524#>4	byte&0x0f	=0x0f		first is type SPECIAL
525# suggestion: at least identify small archives (<1024 files)
5260  belong&0xffff00fc 0x48410000 HA archive data
527>2	leshort		=1		1 file,
528>2	leshort		>1		%u files,
529>4	byte&0x0f	=0		first is type CPY
530>4	byte&0x0f	=1		first is type ASC
531>4	byte&0x0f	=2		first is type HSC
532>4	byte&0x0f	=0x0e		first is type DIR
533>4	byte&0x0f	=0x0f		first is type SPECIAL
534
535# HPACK archiver (Peter Gutmann, pgut1@cs.aukuni.ac.nz)
5360	string		HPAK		HPACK archive data
537
538# JAM Archive volume format, by Dmitry.Kohmanyuk@UA.net
5390	string		\351,\001JAM\ 		JAM archive,
540>7	string		>\0			version %.4s
541>0x26	byte		=0x27			-
542>>0x2b	string          >\0			label %.11s,
543>>0x27	lelong		x			serial %08x,
544>>0x36	string		>\0			fstype %.8s
545
546# LHARC/LHA archiver (Greg Roelofs, newt@uchicago.edu)
5472	string		-lh0-		LHarc 1.x/ARX archive data [lh0]
548!:mime	application/x-lharc
5492	string		-lh1-		LHarc 1.x/ARX archive data [lh1]
550!:mime	application/x-lharc
5512	string		-lz4-		LHarc 1.x archive data [lz4]
552!:mime	application/x-lharc
5532	string		-lz5-		LHarc 1.x archive data [lz5]
554!:mime	application/x-lharc
555#	[never seen any but the last; -lh4- reported in comp.compression:]
5562	string		-lzs-		LHa/LZS archive data [lzs]
557!:mime	application/x-lha
5582	string		-lh\40-		LHa 2.x? archive data [lh ]
559!:mime	application/x-lha
5602	string		-lhd-		LHa 2.x? archive data [lhd]
561!:mime	application/x-lha
5622	string		-lh2-		LHa 2.x? archive data [lh2]
563!:mime	application/x-lha
5642	string		-lh3-		LHa 2.x? archive data [lh3]
565!:mime	application/x-lha
5662	string		-lh4-		LHa (2.x) archive data [lh4]
567!:mime	application/x-lha
5682	string		-lh5-		LHa (2.x) archive data [lh5]
569!:mime	application/x-lha
5702	string		-lh6-		LHa (2.x) archive data [lh6]
571!:mime	application/x-lha
5722	string		-lh7-		LHa (2.x)/LHark archive data [lh7]
573!:mime	application/x-lha
574>20	byte		x		- header level %d
575# taken from idarc [JW]
5762   string      -lZ         PUT archive data
5772   string      -lz         LZS archive data
5782   string      -sw1-       Swag archive data
579
580# RAR archiver (Greg Roelofs, newt@uchicago.edu)
5810	string		Rar!		RAR archive data,
582!:mime	application/x-rar
583>44	byte		x		v%0x,
584>10	byte		>0		flags:
585>>10	byte		&0x01		Archive volume,
586>>10	byte		&0x02		Commented,
587>>10	byte		&0x04		Locked,
588>>10	byte		&0x08		Solid,
589>>10	byte		&0x20		Authenticated,
590>35	byte		0		os: MS-DOS
591>35	byte		1		os: OS/2
592>35	byte		2		os: Win32
593>35	byte		3		os: Unix
594# some old version? idarc says:
5950   string      RE\x7e\x5e  RAR archive data
596
597# SQUISH archiver (Greg Roelofs, newt@uchicago.edu)
5980	string		SQSH		squished archive data (Acorn RISCOS)
599
600# UC2 archiver (Greg Roelofs, newt@uchicago.edu)
601# [JW] see exe section for self-extracting version
6020	string		UC2\x1a		UC2 archive data
603
604# PKZIP multi-volume archive
6050	string		PK\x07\x08PK\x03\x04	Zip multi-volume archive data, at least PKZIP v2.50 to extract
606!:mime	application/zip
607
608# Zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
6090	string		PK\005\006	Zip archive data (empty)
6100	string		PK\003\004
611
612# Specialised zip formats which start with a member named 'mimetype'
613# (stored uncompressed, with no 'extra field') containing the file's MIME type.
614# Check for have 8-byte name, 0-byte extra field, name "mimetype", and
615#  contents starting with "application/":
616>26	string		\x8\0\0\0mimetypeapplication/
617
618#  KOffice / OpenOffice & StarOffice / OpenDocument formats
619#    From: Abel Cheung <abel@oaka.org>
620
621#   KOffice (1.2 or above) formats
622#    (mimetype contains "application/vnd.kde.<SUBTYPE>")
623>>50	string	vnd.kde.		KOffice (>=1.2)
624>>>58	string	karbon			Karbon document
625>>>58	string	kchart			KChart document
626>>>58	string	kformula		KFormula document
627>>>58	string	kivio			Kivio document
628>>>58	string	kontour			Kontour document
629>>>58	string	kpresenter		KPresenter document
630>>>58	string	kspread			KSpread document
631>>>58	string	kword			KWord document
632
633#   OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
634#    (mimetype contains "application/vnd.sun.xml.<SUBTYPE>")
635>>50	string	vnd.sun.xml.		OpenOffice.org 1.x
636>>>62	string	writer			Writer
637>>>>68	byte	!0x2e			document
638>>>>68	string	.template		template
639>>>>68	string	.global			global document
640>>>62	string	calc			Calc
641>>>>66	byte	!0x2e			spreadsheet
642>>>>66	string	.template		template
643>>>62	string	draw			Draw
644>>>>66	byte	!0x2e			document
645>>>>66	string	.template		template
646>>>62	string	impress			Impress
647>>>>69	byte	!0x2e			presentation
648>>>>69	string	.template		template
649>>>62	string	math			Math document
650>>>62	string	base			Database file
651
652#   OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
653#    http://lists.oasis-open.org/archives/office/200505/msg00006.html
654#    (mimetype contains "application/vnd.oasis.opendocument.<SUBTYPE>")
655>>50	string	vnd.oasis.opendocument.	OpenDocument
656>>>73	string	text
657>>>>77	byte	!0x2d			Text
658!:mime	application/vnd.oasis.opendocument.text
659>>>>77	string	-template		Text Template
660!:mime	application/vnd.oasis.opendocument.text-template
661>>>>77	string	-web			HTML Document Template
662!:mime	application/vnd.oasis.opendocument.text-web
663>>>>77	string	-master			Master Document
664!:mime	application/vnd.oasis.opendocument.text-master
665>>>73	string	graphics
666>>>>81	byte	!0x2d			Drawing
667!:mime	application/vnd.oasis.opendocument.graphics
668>>>>81	string	-template		Template
669!:mime	application/vnd.oasis.opendocument.graphics-template
670>>>73	string	presentation
671>>>>85	byte	!0x2d			Presentation
672!:mime	application/vnd.oasis.opendocument.presentation
673>>>>85	string	-template		Template
674!:mime	application/vnd.oasis.opendocument.presentation-template
675>>>73	string	spreadsheet
676>>>>84	byte	!0x2d			Spreadsheet
677!:mime	application/vnd.oasis.opendocument.spreadsheet
678>>>>84	string	-template		Template
679!:mime	application/vnd.oasis.opendocument.spreadsheet-template
680>>>73	string	chart
681>>>>78	byte	!0x2d			Chart
682!:mime	application/vnd.oasis.opendocument.chart
683>>>>78	string	-template		Template
684!:mime	application/vnd.oasis.opendocument.chart-template
685>>>73	string	formula
686>>>>80	byte	!0x2d			Formula
687!:mime	application/vnd.oasis.opendocument.formula
688>>>>80	string	-template		Template
689!:mime	application/vnd.oasis.opendocument.formula-template
690>>>73	string	database		Database
691!:mime	application/vnd.oasis.opendocument.database
692>>>73	string	image
693>>>>78	byte	!0x2d			Image
694!:mime	application/vnd.oasis.opendocument.image
695>>>>78	string	-template		Template
696!:mime	application/vnd.oasis.opendocument.image-template
697
698#  EPUB (OEBPS) books using OCF (OEBPS Container Format)
699#    http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm, section 4.
700#    From: Ralf Brown <ralf.brown@gmail.com>
701>>50	string	epub+zip	EPUB document
702!:mime application/epub+zip
703
704#  Catch other ZIP-with-mimetype formats
705#	In a ZIP file, the bytes immediately after a member's contents are
706#	always "PK". The 2 regex rules here print the "mimetype" member's
707#	contents up to the first 'P'. Luckily, most MIME types don't contain
708#	any capital 'P's. This is a kludge.
709#    (mimetype contains "application/<OTHER>")
710>>50		string	!epub+zip
711>>>50		string	!vnd.oasis.opendocument.
712>>>>50		string	!vnd.sun.xml.
713>>>>>50		string	!vnd.kde.
714>>>>>>38	regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
715!:mime	application/zip
716#    (mimetype contents other than "application/*")
717>26		string	\x8\0\0\0mimetype
718>>38		string	!application/
719>>>38		regex	[!-OQ-~]+		Zip data (MIME type "%s"?)
720!:mime	application/zip
721
722# Java Jar files
723>(26.s+30)	leshort	0xcafe		Java archive data (JAR)
724!:mime	application/java-archive
725
726# Generic zip archives (Greg Roelofs, c/o zip-bugs@wkuvx1.wku.edu)
727#   Next line excludes specialized formats:
728>(26.s+30)	leshort	!0xcafe
729>>26    string          !\x8\0\0\0mimetype	Zip archive data
730!:mime	application/zip
731>>>4	byte		0x09		\b, at least v0.9 to extract
732>>>4	byte		0x0a		\b, at least v1.0 to extract
733>>>4	byte		0x0b		\b, at least v1.1 to extract
734>>>4	byte		0x14		\b, at least v2.0 to extract
735>>>4	byte		0x2d		\b, at least v4.5 to extract
736>>>0x161	string		WINZIP		\b, WinZIP self-extracting
737
738# StarView Metafile
739# From Pierre Ducroquet <pinaraf@pinaraf.info>
7400	string	VCLMTF	StarView MetaFile
741>6	beshort	x	\b, version %d
742>8	belong	x	\b, size %d
743
744# Zoo archiver
74520	lelong		0xfdc4a7dc	Zoo archive data
746!:mime	application/x-zoo
747>4	byte		>48		\b, v%c.
748>>6	byte		>47		\b%c
749>>>7	byte		>47		\b%c
750>32	byte		>0		\b, modify: v%d
751>>33	byte		x		\b.%d+
752>42	lelong		0xfdc4a7dc	\b,
753>>70	byte		>0		extract: v%d
754>>>71	byte		x		\b.%d+
755
756# Shell archives
75710	string		#\ This\ is\ a\ shell\ archive	shell archive text
758!:mime	application/octet-stream
759
760#
761# LBR. NB: May conflict with the questionable
762#          "binary Computer Graphics Metafile" format.
763#
7640       string  \0\ \ \ \ \ \ \ \ \ \ \ \0\0    LBR archive data
765#
766# PMA (CP/M derivative of LHA)
767#
7682       string          -pm0-           PMarc archive data [pm0]
7692       string          -pm1-           PMarc archive data [pm1]
7702       string          -pm2-           PMarc archive data [pm2]
7712       string          -pms-           PMarc SFX archive (CP/M, DOS)
7725       string          -pc1-           PopCom compressed executable (CP/M)
773
774# From Rafael Laboissiere <rafael@laboissiere.net>
775# The Project Revision Control System (see
776# http://prcs.sourceforge.net) generates a packaged project
777# file which is recognized by the following entry:
7780	leshort		0xeb81	PRCS packaged project
779
780# Microsoft cabinets
781# by David Necas (Yeti) <yeti@physics.muni.cz>
782#0	string	MSCF\0\0\0\0	Microsoft cabinet file data,
783#>25	byte	x		v%d
784#>24	byte	x		\b.%d
785# MPi: All CABs have version 1.3, so this is pointless.
786# Better magic in debian-additions.
787
788# GTKtalog catalogs
789# by David Necas (Yeti) <yeti@physics.muni.cz>
7904	string	gtktalog\ 	GTKtalog catalog data,
791>13	string	3		version 3
792>>14	beshort	0x677a		(gzipped)
793>>14	beshort	!0x677a		(not gzipped)
794>13	string	>3		version %s
795
796############################################################################
797# Parity archive reconstruction file, the 'par' file format now used on Usenet.
7980       string          PAR\0	PARity archive data
799>48	leshort		=0	- Index file
800>48	leshort		>0	- file number %d
801
802# Felix von Leitner <felix-file@fefe.de>
8030	string	d8:announce	BitTorrent file
804!:mime	application/x-bittorrent
805
806# Atari MSA archive - Teemu Hukkanen <tjhukkan@iki.fi>
8070	beshort 0x0e0f		Atari MSA archive data
808>2	beshort x		\b, %d sectors per track
809>4	beshort 0		\b, 1 sided
810>4	beshort 1		\b, 2 sided
811>6	beshort x		\b, starting track: %d
812>8	beshort x		\b, ending track: %d
813
814# Alternate ZIP string (amc@arwen.cs.berkeley.edu)
8150	string	PK00PK\003\004	Zip archive data
816
817# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
818# by Stefan `Sec` Zehl <sec@42.org>
8197	string		**ACE**		ACE archive data
820>15	byte	>0		version %d
821>16	byte	=0x00		\b, from MS-DOS
822>16	byte	=0x01		\b, from OS/2
823>16	byte	=0x02		\b, from Win/32
824>16	byte	=0x03		\b, from Unix
825>16	byte	=0x04		\b, from MacOS
826>16	byte	=0x05		\b, from WinNT
827>16	byte	=0x06		\b, from Primos
828>16	byte	=0x07		\b, from AppleGS
829>16	byte	=0x08		\b, from Atari
830>16	byte	=0x09		\b, from Vax/VMS
831>16	byte	=0x0A		\b, from Amiga
832>16	byte	=0x0B		\b, from Next
833>14	byte	x		\b, version %d to extract
834>5	leshort &0x0080		\b, multiple volumes,
835>>17	byte	x		\b (part %d),
836>5	leshort &0x0002		\b, contains comment
837>5	leshort	&0x0200		\b, sfx
838>5	leshort	&0x0400		\b, small dictionary
839>5	leshort	&0x0800		\b, multi-volume
840>5	leshort	&0x1000		\b, contains AV-String
841>>30	string	\x16*UNREGISTERED\x20VERSION*	(unregistered)
842>5	leshort &0x2000		\b, with recovery record
843>5	leshort &0x4000		\b, locked
844>5	leshort &0x8000		\b, solid
845# Date in MS-DOS format (whatever that is)
846#>18	lelong	x		Created on
847
848# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
849# <doj@cubic.org>
8500x1A	string	sfArk		sfArk compressed Soundfont
851>0x15	string	2
852>>0x1	string	>\0		Version %s
853>>0x2A	string	>\0		: %s
854
855# DR-DOS 7.03 Packed File *.??_
8560	string	Packed\ File\ 	Personal NetWare Packed File
857>12	string	x		\b, was "%.12s"
858
859# EET archive
860# From: Tilman Sauerbeck <tilman@code-monkey.de>
8610	belong	0x1ee7ff00	EET archive
862!:mime	application/x-eet
863
864# rzip archives
8650	string	RZIP		rzip compressed data
866>4	byte	x		- version %d
867>5	byte	x		\b.%d
868>6	belong	x		(%d bytes)
869
870# From: "Robert Dale" <robdale@gmail.com>
8710	belong	123		dar archive,
872>4	belong	x		label "%.8x
873>>8	belong	x		%.8x
874>>>12	beshort	x		%.4x"
875>14	byte	0x54		end slice
876>14	beshort	0x4e4e		multi-part
877>14	beshort	0x4e53		multi-part, with -S
878
879# Symbian installation files
880#  http://www.thouky.co.uk/software/psifs/sis.html
881#  http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
8828	lelong	0x10000419	Symbian installation file
883!:mime	application/vnd.symbian.install
884>4	lelong	0x1000006D	(EPOC release 3/4/5)
885>4	lelong	0x10003A12	(EPOC release 6)
8860	lelong	0x10201A7A	Symbian installation file (Symbian OS 9.x)
887!:mime	x-epoc/x-sisx-app
888
889# From "Nelson A. de Oliveira" <naoliv@gmail.com>
8900	string	MPQ\032		MoPaQ (MPQ) archive
891
892# From: Dirk Jagdmann <doj@cubic.org>
893# xar archive format: http://code.google.com/p/xar/
8940	string	xar!		xar archive
895>6	beshort	x		- version %d
896
897# From: "Nelson A. de Oliveira" <naoliv@gmail.com>
898# .kgb
8990	string KGB_arch		KGB Archiver file
900>10	string x		with compression level %.1s
901
902# xar (eXtensible ARchiver) archive
903# From: "David Remahl" <dremahl@apple.com>
9040	string	xar!		xar archive
905#>4	beshort	x		header size %d
906>6	beshort	x		version %d,
907#>8	quad	x		compressed TOC: %d,
908#>16	quad	x		uncompressed TOC: %d,
909>24	belong	0		no checksum
910>24	belong	1		SHA-1 checksum
911>24	belong	2		MD5 checksum
912
913# Type: Parity Archive
914# From: Daniel van Eeden <daniel_e@dds.nl>
9150	string	PAR2		Parity Archive Volume Set
916
917# Bacula volume format. (Volumes always start with a block header.)
918# URL: http://bacula.org/3.0.x-manuals/en/developers/developers/Block_Header.html
919# From: Adam Buchbinder <adam.buchbinder@gmail.com>
92012	string	BB02		Bacula volume
921>20	bedate	x		\b, started %s
922
923# ePub is XHTML + XML inside a ZIP archive.  The first member of the
924#   archive must be an uncompressed file called 'mimetype' with contents
925#   'application/epub+zip'
926
927
928# From: "Michael Gorny" <mgorny@gentoo.org>
929# ZPAQ: http://mattmahoney.net/dc/zpaq.html
9300	string	zPQ	ZPAQ stream
931>3	byte	x	\b, level %d
932# From: Barry Carter <carter.barry@gmail.com>
933# http://encode.ru/threads/456-zpaq-updates/page32
9340	string	7kSt	ZPAQ file
935
936# BBeB ebook, unencrypted (LRF format)
937# URL: http://www.sven.de/librie/Librie/LrfFormat
938# From: Adam Buchbinder <adam.buchbinder@gmail.com>
9390	string	L\0R\0F\0\0\0	BBeB ebook data, unencrypted
940>8	beshort	x		\b, version %d
941>36	byte	1		\b, front-to-back
942>36	byte	16		\b, back-to-front
943>42	beshort	x		\b, (%dx,
944>44	beshort	x		%d)
945
946# Symantec GHOST image by Joerg Jenderek at May 2014
947# http://us.norton.com/ghost/
948# http://www.garykessler.net/library/file_sigs.html
9490		ubelong&0xFFFFf7f0	0xFEEF0100	Norton GHost image
950# *.GHO
951>2		ubyte&0x08		0x00		\b, first file
952# *.GHS or *.[0-9] with cns program option
953>2		ubyte&0x08		0x08		\b, split file
954# part of split index interesting for *.ghs
955>>4		ubyte			x		id=0x%x
956# compression tag minus one equals numeric compression command line switch z[1-9]
957>3		ubyte			0		\b, no compression
958>3		ubyte			2		\b, fast compression (Z1)
959>3		ubyte			3		\b, medium compression (Z2)
960>3		ubyte			>3		
961>>3		ubyte			<11		\b, compression (Z%d-1)
962>2		ubyte&0x08		0x00		
963# ~ 30 byte password field only for *.gho
964>>12		ubequad			!0		\b, password protected
965>>44		ubyte			!1		
966# 1~Image All, sector-by-sector only for *.gho
967>>>10		ubyte			1		\b, sector copy
968# 1~Image Boot track only for *.gho
969>>>43		ubyte			1		\b, boot track
970# 1~Image Disc only for *.gho implies Image Boot track and sector copy
971>>44		ubyte			1		\b, disc sector copy
972# optional image description only *.gho
973>>0xff		string			>\0		"%-.254s"
974# look for DOS sector end sequence
975>0xE08	search/7776		\x55\xAA	
976>>&-512	indirect		x		\b; contains 
977
978