1 # Magic data for mod_mime_magic Apache module (originally for file(1) command)
\r
2 # The module is described in /manual/mod/mod_mime_magic.html
\r
4 # The format is 4-5 columns:
\r
5 # Column #1: byte number to begin checking from, ">" indicates continuation
\r
6 # Column #2: type of data to match
\r
7 # Column #3: contents of data to match
\r
8 # Column #4: MIME type of result
\r
9 # Column #5: MIME encoding of result (optional)
\r
11 #------------------------------------------------------------------------------
\r
12 # Localstuff: file(1) magic for locally observed files
\r
13 # Add any locally observed files here.
\r
15 #------------------------------------------------------------------------------
\r
17 #------------------------------------------------------------------------------
\r
19 #------------------------------------------------------------------------------
\r
23 >2 short 0xbabe application/java
\r
25 #------------------------------------------------------------------------------
\r
26 # audio: file(1) magic for sound formats
\r
28 # from Jan Nicolai Langfeldt <janl@ifi.uio.no>,
\r
31 # Sun/NeXT audio data
\r
33 >12 belong 1 audio/basic
\r
34 >12 belong 2 audio/basic
\r
35 >12 belong 3 audio/basic
\r
36 >12 belong 4 audio/basic
\r
37 >12 belong 5 audio/basic
\r
38 >12 belong 6 audio/basic
\r
39 >12 belong 7 audio/basic
\r
41 >12 belong 23 audio/x-adpcm
\r
43 # DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
\r
44 # that uses little-endian encoding and has a different magic number
\r
45 # (0x0064732E in little-endian encoding).
\r
46 0 lelong 0x0064732E
\r
47 >12 lelong 1 audio/x-dec-basic
\r
48 >12 lelong 2 audio/x-dec-basic
\r
49 >12 lelong 3 audio/x-dec-basic
\r
50 >12 lelong 4 audio/x-dec-basic
\r
51 >12 lelong 5 audio/x-dec-basic
\r
52 >12 lelong 6 audio/x-dec-basic
\r
53 >12 lelong 7 audio/x-dec-basic
\r
54 # compressed (G.721 ADPCM)
\r
55 >12 lelong 23 audio/x-dec-adpcm
\r
57 # Bytes 0-3 of AIFF, AIFF-C, & 8SVX audio files are "FORM"
\r
59 8 string AIFF audio/x-aiff
\r
61 8 string AIFC audio/x-aiff
\r
62 # IFF/8SVX audio data
\r
63 8 string 8SVX audio/x-aiff
\r
65 # Creative Labs AUDIO stuff
\r
66 # Standard MIDI data
\r
67 0 string MThd audio/unknown
\r
68 #>9 byte >0 (format %d)
\r
69 #>11 byte >1 using %d channels
\r
70 # Creative Music (CMF) data
\r
71 0 string CTMF audio/unknown
\r
72 # SoundBlaster instrument data
\r
73 0 string SBI audio/unknown
\r
74 # Creative Labs voice data
\r
75 0 string Creative\ Voice\ File audio/unknown
\r
76 ## is this next line right? it came this way...
\r
78 #>23 byte >0 - version %d
\r
81 # [GRR 950115: is this also Creative Labs? Guessing that first line
\r
82 # should be string instead of unknown-endian long...]
\r
83 #0 long 0x4e54524b MultiTrack sound data
\r
84 #0 string NTRK MultiTrack sound data
\r
85 #>4 long x - version %ld
\r
87 # Microsoft WAVE format (*.wav)
\r
88 # [GRR 950115: probably all of the shorts and longs should be leshort/lelong]
\r
90 0 string RIFF audio/unknown
\r
92 >8 string WAVE audio/x-wav
\r
94 0 beshort&0xfff0 0xfff0 audio/mpeg
\r
95 # C64 SID Music files, from Linus Walleij <triad@df.lth.se>
\r
96 0 string PSID audio/prs.sid
\r
98 #------------------------------------------------------------------------------
\r
99 # c-lang: file(1) magic for C programs or various scripts
\r
102 # XPM icons (Greg Roelofs, newt@uchicago.edu)
\r
103 # ideally should go into "images", but entries below would tag XPM as C source
\r
104 0 string /*\ XPM image/x-xbm 7bit
\r
106 # this first will upset you if you're a PL/1 shop... (are there any left?)
\r
107 # in which case rm it; ascmagic will catch real C programs
\r
108 # C or REXX program text
\r
109 0 string /* text/plain
\r
111 0 string // text/plain
\r
113 #------------------------------------------------------------------------------
\r
114 # compress: file(1) magic for pure-compression formats (no archives)
\r
116 # compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, whap, etc.
\r
118 # Formats for various forms of compressed data
\r
119 # Formats for "compress" proper have been moved into "compress.c",
\r
120 # because it tries to uncompress it to figure out what's inside.
\r
122 # standard unix compress
\r
123 0 string \037\235 application/octet-stream x-compress
\r
125 # gzip (GNU zip, not to be confused with [Info-ZIP/PKWARE] zip archiver)
\r
126 0 string \037\213 application/octet-stream x-gzip
\r
128 # According to gzip.h, this is the correct byte order for packed data.
\r
129 0 string \037\036 application/octet-stream
\r
131 # This magic number is byte-order-independent.
\r
133 0 short 017437 application/octet-stream
\r
135 # XXX - why *two* entries for "compacted data", one of which is
\r
136 # byte-order independent, and one of which is byte-order dependent?
\r
139 0 short 0x1fff application/octet-stream
\r
140 0 string \377\037 application/octet-stream
\r
142 0 short 0145405 application/octet-stream
\r
144 # Squeeze and Crunch...
\r
145 # These numbers were gleaned from the Unix versions of the programs to
\r
146 # handle these formats. Note that I can only uncrunch, not crunch, and
\r
147 # I didn't have a crunched file handy, so the crunch number is untested.
\r
148 # Keith Waclena <keith@cerberus.uchicago.edu>
\r
149 #0 leshort 0x76FF squeezed data (CP/M, DOS)
\r
150 #0 leshort 0x76FE crunched data (CP/M, DOS)
\r
153 #0 string \037\237 Frozen file 2.1
\r
154 #0 string \037\236 Frozen file 1.0 (or gzip 0.5)
\r
157 #0 string \037\240 LZH compressed data
\r
159 #------------------------------------------------------------------------------
\r
160 # frame: file(1) magic for FrameMaker files
\r
162 # This stuff came on a FrameMaker demo tape, most of which is
\r
163 # copyright, but this file is "published" as witness the following:
\r
165 0 string \<MakerFile application/x-frame
\r
166 0 string \<MIFFile application/x-frame
\r
167 0 string \<MakerDictionary application/x-frame
\r
168 0 string \<MakerScreenFon application/x-frame
\r
169 0 string \<MML application/x-frame
\r
170 0 string \<Book application/x-frame
\r
171 0 string \<Maker application/x-frame
\r
173 #------------------------------------------------------------------------------
\r
174 # html: file(1) magic for HTML (HyperText Markup Language) docs
\r
176 # from Daniel Quinlan <quinlan@yggdrasil.com>
\r
177 # and Anna Shergold <anna@inext.co.uk>
\r
179 0 string \<!DOCTYPE\ HTML text/html
\r
180 0 string \<!doctype\ html text/html
\r
181 0 string \<HEAD text/html
\r
182 0 string \<head text/html
\r
183 0 string \<TITLE text/html
\r
184 0 string \<title text/html
\r
185 0 string \<html text/html
\r
186 0 string \<HTML text/html
\r
187 0 string \<!-- text/html
\r
188 0 string \<h1 text/html
\r
189 0 string \<H1 text/html
\r
191 # XML eXtensible Markup Language, from Linus Walleij <triad@df.lth.se>
\r
192 0 string \<?xml text/xml
\r
194 #------------------------------------------------------------------------------
\r
195 # images: file(1) magic for image formats (see also "c-lang" for XPM bitmaps)
\r
197 # originally from jef@helios.ee.lbl.gov (Jef Poskanzer),
\r
198 # additions by janl@ifi.uio.no as well as others. Jan also suggested
\r
199 # merging several one- and two-line files into here.
\r
201 # XXX - byte order for GIF and TIFF fields?
\r
202 # [GRR: TIFF allows both byte orders; GIF is probably little-endian]
\r
205 # [GRR: what the hell is this doing in here?]
\r
206 #0 string xbtoa btoa'd file
\r
210 0 string P1 image/x-portable-bitmap 7bit
\r
212 0 string P2 image/x-portable-greymap 7bit
\r
214 0 string P3 image/x-portable-pixmap 7bit
\r
215 # PBM "rawbits" file
\r
216 0 string P4 image/x-portable-bitmap
\r
217 # PGM "rawbits" file
\r
218 0 string P5 image/x-portable-greymap
\r
219 # PPM "rawbits" file
\r
220 0 string P6 image/x-portable-pixmap
\r
222 # NIFF (Navy Interchange File Format, a modification of TIFF)
\r
223 # [GRR: this *must* go before TIFF]
\r
224 0 string IIN1 image/x-niff
\r
227 # TIFF file, big-endian
\r
228 0 string MM image/tiff
\r
229 # TIFF file, little-endian
\r
230 0 string II image/tiff
\r
232 # possible GIF replacements; none yet released!
\r
233 # (Greg Roelofs, newt@uchicago.edu)
\r
235 # GRR 950115: this was mine ("Zip GIF"):
\r
236 # ZIF image (GIF+deflate alpha)
\r
237 0 string GIF94z image/unknown
\r
239 # GRR 950115: this is Jeremy Wohl's Free Graphics Format (better):
\r
240 # FGF image (GIF+deflate beta)
\r
241 0 string FGF95a image/unknown
\r
243 # GRR 950115: this is Thomas Boutell's Portable Bitmap Format proposal
\r
244 # (best; not yet implemented):
\r
245 # PBF image (deflate compression)
\r
246 0 string PBF image/unknown
\r
249 0 string GIF image/gif
\r
252 0 beshort 0xffd8 image/jpeg
\r
254 # PC bitmaps (OS/2, Windoze BMP files) (Greg Roelofs, newt@uchicago.edu)
\r
255 0 string BM image/bmp
\r
256 #>14 byte 12 (OS/2 1.x format)
\r
257 #>14 byte 64 (OS/2 2.x format)
\r
258 #>14 byte 40 (Windows 3.x format)
\r
260 #0 string PI pointer
\r
261 #0 string CI color icon
\r
262 #0 string CP color pointer
\r
263 #0 string BA bitmap array
\r
266 #------------------------------------------------------------------------------
\r
267 # lisp: file(1) magic for lisp programs
\r
269 # various lisp types, from Daniel Quinlan (quinlan@yggdrasil.com)
\r
270 0 string ;; text/plain 8bit
\r
271 # Emacs 18 - this is always correct, but not very magical.
\r
272 0 string \012( application/x-elc
\r
274 0 string ;ELC\023\000\000\000 application/x-elc
\r
276 #------------------------------------------------------------------------------
\r
277 # mail.news: file(1) magic for mail and news
\r
279 # There are tests to ascmagic.c to cope with mail and news.
\r
280 0 string Relay-Version: message/rfc822 7bit
\r
281 0 string #!\ rnews message/rfc822 7bit
\r
282 0 string N#!\ rnews message/rfc822 7bit
\r
283 0 string Forward\ to message/rfc822 7bit
\r
284 0 string Pipe\ to message/rfc822 7bit
\r
285 0 string Return-Path: message/rfc822 7bit
\r
286 0 string Path: message/news 8bit
\r
287 0 string Xref: message/news 8bit
\r
288 0 string From: message/rfc822 7bit
\r
289 0 string Article message/news 8bit
\r
290 #------------------------------------------------------------------------------
\r
291 # msword: file(1) magic for MS Word files
\r
293 # Contributor claims:
\r
294 # Reversed-engineered MS Word magic numbers
\r
297 0 string \376\067\0\043 application/msword
\r
298 0 string \333\245-\0\0\0 application/msword
\r
300 # disable this one because it applies also to other
\r
301 # Office/OLE documents for which msword is not correct. See PR#2608.
\r
302 #0 string \320\317\021\340\241\261 application/msword
\r
306 #------------------------------------------------------------------------------
\r
307 # printer: file(1) magic for printer-formatted files
\r
311 0 string %! application/postscript
\r
312 0 string \004%! application/postscript
\r
315 # (due to clamen@cs.cmu.edu)
\r
316 0 string %PDF- application/pdf
\r
318 #------------------------------------------------------------------------------
\r
319 # sc: file(1) magic for "sc" spreadsheet
\r
321 38 string Spreadsheet application/x-sc
\r
323 #------------------------------------------------------------------------------
\r
324 # tex: file(1) magic for TeX files
\r
326 # XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
\r
328 # From <conklin@talisman.kaleida.com>
\r
330 # Although we may know the offset of certain text fields in TeX DVI
\r
331 # and font files, we can't use them reliably because they are not
\r
332 # zero terminated. [but we do anyway, christos]
\r
333 0 string \367\002 application/x-dvi
\r
334 #0 string \367\203 TeX generic font data
\r
335 #0 string \367\131 TeX packed font data
\r
336 #0 string \367\312 TeX virtual font data
\r
337 #0 string This\ is\ TeX, TeX transcript text
\r
338 #0 string This\ is\ METAFONT, METAFONT transcript text
\r
340 # There is no way to detect TeX Font Metric (*.tfm) files without
\r
341 # breaking them apart and reading the data. The following patterns
\r
342 # match most *.tfm files generated by METAFONT or afm2tfm.
\r
343 #2 string \000\021 TeX font metric data
\r
344 #2 string \000\022 TeX font metric data
\r
345 #>34 string >\0 (%s)
\r
347 # Texinfo and GNU Info, from Daniel Quinlan (quinlan@yggdrasil.com)
\r
348 #0 string \\input\ texinfo Texinfo source text
\r
349 #0 string This\ is\ Info\ file GNU Info text
\r
351 # correct TeX magic for Linux (and maybe more)
\r
352 # from Peter Tobias (tobias@server.et-inf.fho-emden.de)
\r
354 0 leshort 0x02f7 application/x-dvi
\r
356 # RTF - Rich Text Format
\r
357 0 string {\\rtf application/rtf
\r
359 #------------------------------------------------------------------------------
\r
360 # animation: file(1) magic for animation/movie formats
\r
362 # animation formats, originally from vax@ccwf.cc.utexas.edu (VaX#n8)
\r
364 0 string \000\000\001\263 video/mpeg
\r
366 # The contributor claims:
\r
367 # I couldn't find a real magic number for these, however, this
\r
368 # -appears- to work. Note that it might catch other files, too,
\r
371 # Note that title and author appear in the two 20-byte chunks
\r
372 # at decimal offsets 2 and 22, respectively, but they are XOR'ed with
\r
373 # 255 (hex FF)! DL format SUCKS BIG ROCKS.
\r
375 # DL file version 1 , medium format (160x100, 4 images/screen)
\r
376 0 byte 1 video/unknown
\r
377 0 byte 2 video/unknown
\r
378 # Quicktime video, from Linus Walleij <triad@df.lth.se>
\r
379 # from Apple quicktime file format documentation.
\r
380 4 string moov video/quicktime
\r
381 4 string mdat video/quicktime
\r