1# Magic data for mod_mime_magic Apache module (originally for file(1) command)
2# The module is described in /manual/mod/mod_mime_magic.html
3#
4# The format is 4-5 columns:
5#    Column #1: byte number to begin checking from, ">" indicates continuation
6#    Column #2: type of data to match
7#    Column #3: contents of data to match
8#    Column #4: MIME type of result
9#    Column #5: MIME encoding of result (optional)
10
11#------------------------------------------------------------------------------
12# Localstuff:  file(1) magic for locally observed files
13# Add any locally observed files here.
14
15#------------------------------------------------------------------------------
16# end local stuff
17#------------------------------------------------------------------------------
18
19#------------------------------------------------------------------------------
20# Java
21
220	short		0xcafe
23>2	short		0xbabe		application/java
24
25#------------------------------------------------------------------------------
26# audio:  file(1) magic for sound formats
27#
28# from Jan Nicolai Langfeldt <janl@ifi.uio.no>,
29#
30
31# Sun/NeXT audio data
320	string		.snd
33>12	belong		1		audio/basic
34>12	belong		2		audio/basic
35>12	belong		3		audio/basic
36>12	belong		4		audio/basic
37>12	belong		5		audio/basic
38>12	belong		6		audio/basic
39>12	belong		7		audio/basic
40
41>12	belong		23		audio/x-adpcm
42
43# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
44# that uses little-endian encoding and has a different magic number
45# (0x0064732E in little-endian encoding).
460	lelong		0x0064732E	
47>12	lelong		1		audio/x-dec-basic
48>12	lelong		2		audio/x-dec-basic
49>12	lelong		3		audio/x-dec-basic
50>12	lelong		4		audio/x-dec-basic
51>12	lelong		5		audio/x-dec-basic
52>12	lelong		6		audio/x-dec-basic
53>12	lelong		7		audio/x-dec-basic
54#                                       compressed (G.721 ADPCM)
55>12	lelong		23		audio/x-dec-adpcm
56
57# Bytes 0-3 of AIFF, AIFF-C, & 8SVX audio files are "FORM"
58#					AIFF audio data
598	string		AIFF		audio/x-aiff	
60#					AIFF-C audio data
618	string		AIFC		audio/x-aiff	
62#					IFF/8SVX audio data
638	string		8SVX		audio/x-aiff	
64
65# Creative Labs AUDIO stuff
66#					Standard MIDI data
670	string	MThd			audio/unknown	
68#>9 	byte	>0			(format %d)
69#>11	byte	>1			using %d channels
70#					Creative Music (CMF) data
710	string	CTMF			audio/unknown	
72#					SoundBlaster instrument data
730	string	SBI			audio/unknown	
74#					Creative Labs voice data
750	string	Creative\ Voice\ File	audio/unknown	
76## is this next line right?  it came this way...
77#>19	byte	0x1A
78#>23	byte	>0			- version %d
79#>22	byte	>0			\b.%d
80
81# [GRR 950115:  is this also Creative Labs?  Guessing that first line
82#  should be string instead of unknown-endian long...]
83#0	long		0x4e54524b	MultiTrack sound data
84#0	string		NTRK		MultiTrack sound data
85#>4	long		x		- version %ld
86
87# Microsoft WAVE format (*.wav)
88# [GRR 950115:  probably all of the shorts and longs should be leshort/lelong]
89#					Microsoft RIFF
900	string		RIFF		audio/unknown
91#					- WAVE format
92>8	string		WAVE		audio/x-wav
93# MPEG audio.
940   beshort&0xfff0  0xfff0  audio/mpeg
95# C64 SID Music files, from Linus Walleij <triad@df.lth.se>
960   string      PSID        audio/prs.sid
97
98#------------------------------------------------------------------------------
99# c-lang:  file(1) magic for C programs or various scripts
100#
101
102# XPM icons (Greg Roelofs, newt@uchicago.edu)
103# ideally should go into "images", but entries below would tag XPM as C source
1040	string		/*\ XPM		image/x-xbm	7bit
105
106# this first will upset you if you're a PL/1 shop... (are there any left?)
107# in which case rm it; ascmagic will catch real C programs
108#					C or REXX program text
1090	string		/*		text/plain
110#					C++ program text
1110	string		//		text/plain
112
113#------------------------------------------------------------------------------
114# compress:  file(1) magic for pure-compression formats (no archives)
115#
116# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, whap, etc.
117#
118# Formats for various forms of compressed data
119# Formats for "compress" proper have been moved into "compress.c",
120# because it tries to uncompress it to figure out what's inside.
121
122# standard unix compress
1230	string		\037\235	application/octet-stream	x-compress
124
125# gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver)
1260       string          \037\213        application/octet-stream	x-gzip
127
128# According to gzip.h, this is the correct byte order for packed data.
1290	string		\037\036	application/octet-stream
130#
131# This magic number is byte-order-independent.
132#
1330	short		017437		application/octet-stream
134
135# XXX - why *two* entries for "compacted data", one of which is
136# byte-order independent, and one of which is byte-order dependent?
137#
138# compacted data
1390	short		0x1fff		application/octet-stream
1400	string		\377\037	application/octet-stream
141# huf output
1420	short		0145405		application/octet-stream
143
144# Squeeze and Crunch...
145# These numbers were gleaned from the Unix versions of the programs to
146# handle these formats.  Note that I can only uncrunch, not crunch, and
147# I didn't have a crunched file handy, so the crunch number is untested.
148#				Keith Waclena <keith@cerberus.uchicago.edu>
149#0	leshort		0x76FF		squeezed data (CP/M, DOS)
150#0	leshort		0x76FE		crunched data (CP/M, DOS)
151
152# Freeze
153#0	string		\037\237	Frozen file 2.1
154#0	string		\037\236	Frozen file 1.0 (or gzip 0.5)
155
156# lzh?
157#0	string		\037\240	LZH compressed data
158
159#------------------------------------------------------------------------------
160# frame:  file(1) magic for FrameMaker files
161#
162# This stuff came on a FrameMaker demo tape, most of which is
163# copyright, but this file is "published" as witness the following:
164#
1650	string		\<MakerFile	application/x-frame
1660	string		\<MIFFile	application/x-frame
1670	string		\<MakerDictionary	application/x-frame
1680	string		\<MakerScreenFon	application/x-frame
1690	string		\<MML		application/x-frame
1700	string		\<Book		application/x-frame
1710	string		\<Maker		application/x-frame
172
173#------------------------------------------------------------------------------
174# html:  file(1) magic for HTML (HyperText Markup Language) docs
175#
176# from Daniel Quinlan <quinlan@yggdrasil.com>
177# and Anna Shergold <anna@inext.co.uk>
178#
1790   string      \<!DOCTYPE\ HTML    text/html
1800   string      \<!doctype\ html    text/html
1810   string      \<HEAD      text/html
1820   string      \<head      text/html
1830   string      \<TITLE     text/html
1840   string      \<title     text/html
1850   string      \<html      text/html
1860   string      \<HTML      text/html
1870   string      \<!--       text/html
1880   string      \<h1        text/html
1890   string      \<H1        text/html
190
191# XML eXtensible Markup Language, from Linus Walleij <triad@df.lth.se>
1920   string      \<?xml      text/xml
193
194#------------------------------------------------------------------------------
195# images:  file(1) magic for image formats (see also "c-lang" for XPM bitmaps)
196#
197# originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
198# additions by janl@ifi.uio.no as well as others. Jan also suggested
199# merging several one- and two-line files into here.
200#
201# XXX - byte order for GIF and TIFF fields?
202# [GRR:  TIFF allows both byte orders; GIF is probably little-endian]
203#
204
205# [GRR:  what the hell is this doing in here?]
206#0	string		xbtoa		btoa'd file
207
208# PBMPLUS
209#					PBM file
2100	string		P1		image/x-portable-bitmap	7bit
211#					PGM file
2120	string		P2		image/x-portable-greymap	7bit
213#					PPM file
2140	string		P3		image/x-portable-pixmap	7bit
215#					PBM "rawbits" file
2160	string		P4		image/x-portable-bitmap
217#					PGM "rawbits" file
2180	string		P5		image/x-portable-greymap
219#					PPM "rawbits" file
2200	string		P6		image/x-portable-pixmap
221
222# NIFF (Navy Interchange File Format, a modification of TIFF)
223# [GRR:  this *must* go before TIFF]
2240	string		IIN1		image/x-niff
225
226# TIFF and friends
227#					TIFF file, big-endian
2280	string		MM		image/tiff
229#					TIFF file, little-endian
2300	string		II		image/tiff
231
232# possible GIF replacements; none yet released!
233# (Greg Roelofs, newt@uchicago.edu)
234#
235# GRR 950115:  this was mine ("Zip GIF"):
236#					ZIF image (GIF+deflate alpha)
2370	string		GIF94z		image/unknown
238#
239# GRR 950115:  this is Jeremy Wohl's Free Graphics Format (better):
240#					FGF image (GIF+deflate beta)
2410	string		FGF95a		image/unknown
242#
243# GRR 950115:  this is Thomas Boutell's Portable Bitmap Format proposal
244# (best; not yet implemented):
245#					PBF image (deflate compression)
2460	string		PBF		image/unknown
247
248# GIF
2490	string		GIF		image/gif
250
251# JPEG images
2520	beshort		0xffd8		image/jpeg
253
254# PC bitmaps (OS/2, Windoze BMP files)  (Greg Roelofs, newt@uchicago.edu)
2550	string		BM		image/bmp
256#>14	byte		12		(OS/2 1.x format)
257#>14	byte		64		(OS/2 2.x format)
258#>14	byte		40		(Windows 3.x format)
259#0	string		IC		icon
260#0	string		PI		pointer
261#0	string		CI		color icon
262#0	string		CP		color pointer
263#0	string		BA		bitmap array
264
2650	string		\x89PNG		image/png
2660	string		FWS		application/x-shockwave-flash
2670	string		CWS		application/x-shockwave-flash
268
269#------------------------------------------------------------------------------
270# lisp:  file(1) magic for lisp programs
271#
272# various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
2730	string	;;			text/plain	8bit
274# Emacs 18 - this is always correct, but not very magical.
2750	string	\012(			application/x-elc
276# Emacs 19
2770	string	;ELC\023\000\000\000	application/x-elc
278
279#------------------------------------------------------------------------------
280# mail.news:  file(1) magic for mail and news
281#
282# There are tests to ascmagic.c to cope with mail and news.
2830	string		Relay-Version: 	message/rfc822	7bit
2840	string		#!\ rnews	message/rfc822	7bit
2850	string		N#!\ rnews	message/rfc822	7bit
2860	string		Forward\ to 	message/rfc822	7bit
2870	string		Pipe\ to 	message/rfc822	7bit
2880	string		Return-Path:	message/rfc822	7bit
2890	string		Path:		message/news	8bit
2900	string		Xref:		message/news	8bit
2910	string		From:		message/rfc822	7bit
2920	string		Article 	message/news	8bit
293#------------------------------------------------------------------------------
294# msword: file(1) magic for MS Word files
295#
296# Contributor claims:
297# Reversed-engineered MS Word magic numbers
298#
299
3000	string		\376\067\0\043			application/msword
3010	string		\333\245-\0\0\0			application/msword
302
303# disable this one because it applies also to other
304# Office/OLE documents for which msword is not correct. See PR#2608.
305#0	string		\320\317\021\340\241\261	application/msword
306
307
308
309#------------------------------------------------------------------------------
310# printer:  file(1) magic for printer-formatted files
311#
312
313# PostScript
3140	string		%!		application/postscript
3150	string		\004%!		application/postscript
316
317# Acrobat
318# (due to clamen@cs.cmu.edu)
3190	string		%PDF-		application/pdf
320
321#------------------------------------------------------------------------------
322# sc:  file(1) magic for "sc" spreadsheet
323#
32438	string		Spreadsheet	application/x-sc
325
326#------------------------------------------------------------------------------
327# tex:  file(1) magic for TeX files
328#
329# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
330#
331# From <conklin@talisman.kaleida.com>
332
333# Although we may know the offset of certain text fields in TeX DVI
334# and font files, we can't use them reliably because they are not
335# zero terminated. [but we do anyway, christos]
3360	string		\367\002	application/x-dvi
337#0	string		\367\203	TeX generic font data
338#0	string		\367\131	TeX packed font data
339#0	string		\367\312	TeX virtual font data
340#0	string		This\ is\ TeX,	TeX transcript text	
341#0	string		This\ is\ METAFONT,	METAFONT transcript text
342
343# There is no way to detect TeX Font Metric (*.tfm) files without
344# breaking them apart and reading the data.  The following patterns
345# match most *.tfm files generated by METAFONT or afm2tfm.
346#2	string		\000\021	TeX font metric data
347#2	string		\000\022	TeX font metric data
348#>34	string		>\0		(%s)
349
350# Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
351#0	string		\\input\ texinfo	Texinfo source text
352#0	string		This\ is\ Info\ file	GNU Info text
353
354# correct TeX magic for Linux (and maybe more)
355# from Peter Tobias (tobias@server.et-inf.fho-emden.de)
356#
3570	leshort		0x02f7		application/x-dvi
358
359# RTF - Rich Text Format
3600	string		{\\rtf		application/rtf
361
362#------------------------------------------------------------------------------
363# animation:  file(1) magic for animation/movie formats
364#
365# animation formats, originally from vax@ccwf.cc.utexas.edu (VaX#n8)
366#						MPEG file
3670	string		\000\000\001\263	video/mpeg
368#
369# The contributor claims:
370#   I couldn't find a real magic number for these, however, this
371#   -appears- to work.  Note that it might catch other files, too,
372#   so BE CAREFUL!
373#
374# Note that title and author appear in the two 20-byte chunks
375# at decimal offsets 2 and 22, respectively, but they are XOR'ed with
376# 255 (hex FF)! DL format SUCKS BIG ROCKS.
377#
378#						DL file version 1 , medium format (160x100, 4 images/screen)
3790	byte		1			video/unknown
3800	byte		2			video/unknown
381# Quicktime video, from Linus Walleij <triad@df.lth.se>
382# from Apple quicktime file format documentation.
3834   string      moov        video/quicktime
3844   string      mdat        video/quicktime
385
386