1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * mod_mime_magic: MIME type lookup via file magic numbers
19 * Copyright (c) 1996-1997 Cisco Systems, Inc.
20 *
21 * This software was submitted by Cisco Systems to the Apache Software Foundation in July
22 * 1997.  Future revisions and derivatives of this source code must
23 * acknowledge Cisco Systems as the original contributor of this module.
24 * All other licensing and usage conditions are those of the Apache Software Foundation.
25 *
26 * Some of this code is derived from the free version of the file command
27 * originally posted to comp.sources.unix.  Copyright info for that program
28 * is included below as required.
29 * ---------------------------------------------------------------------------
30 * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin.
31 *
32 * This software is not subject to any license of the American Telephone and
33 * Telegraph Company or of the Regents of the University of California.
34 *
35 * Permission is granted to anyone to use this software for any purpose on any
36 * computer system, and to alter it and redistribute it freely, subject to
37 * the following restrictions:
38 *
39 * 1. The author is not responsible for the consequences of use of this
40 * software, no matter how awful, even if they arise from flaws in it.
41 *
42 * 2. The origin of this software must not be misrepresented, either by
43 * explicit claim or by omission.  Since few users ever read sources, credits
44 * must appear in the documentation.
45 *
46 * 3. Altered versions must be plainly marked as such, and must not be
47 * misrepresented as being the original software.  Since few users ever read
48 * sources, credits must appear in the documentation.
49 *
50 * 4. This notice may not be removed or altered.
51 * -------------------------------------------------------------------------
52 *
53 * For compliance with Mr Darwin's terms: this has been very significantly
54 * modified from the free "file" command.
55 * - all-in-one file for compilation convenience when moving from one
56 *   version of Apache to the next.
57 * - Memory allocation is done through the Apache API's apr_pool_t structure.
58 * - All functions have had necessary Apache API request or server
59 *   structures passed to them where necessary to call other Apache API
60 *   routines.  (i.e. usually for logging, files, or memory allocation in
61 *   itself or a called function.)
62 * - struct magic has been converted from an array to a single-ended linked
63 *   list because it only grows one record at a time, it's only accessed
64 *   sequentially, and the Apache API has no equivalent of realloc().
65 * - Functions have been changed to get their parameters from the server
66 *   configuration instead of globals.  (It should be reentrant now but has
67 *   not been tested in a threaded environment.)
68 * - Places where it used to print results to stdout now saves them in a
69 *   list where they're used to set the MIME type in the Apache request
70 *   record.
71 * - Command-line flags have been removed since they will never be used here.
72 *
73 * Ian Kluft <ikluft@cisco.com>
74 * Engineering Information Framework
75 * Central Engineering
76 * Cisco Systems, Inc.
77 * San Jose, CA, USA
78 *
79 * Initial installation          July/August 1996
80 * Misc bug fixes                May 1997
81 * Submission to Apache Software Foundation    July 1997
82 *
83 */
84
85#include "apr.h"
86#include "apr_strings.h"
87#include "apr_lib.h"
88#define APR_WANT_STRFUNC
89#include "apr_want.h"
90
91#if APR_HAVE_UNISTD_H
92#include <unistd.h>
93#endif
94
95#include "ap_config.h"
96#include "httpd.h"
97#include "http_config.h"
98#include "http_request.h"
99#include "http_core.h"
100#include "http_log.h"
101#include "http_protocol.h"
102#include "util_script.h"
103
104/* ### this isn't set by configure? does anybody set this? */
105#ifdef HAVE_UTIME_H
106#include <utime.h>
107#endif
108
109/*
110 * data structures and related constants
111 */
112
113#define MODNAME        "mod_mime_magic"
114#define MIME_MAGIC_DEBUG        0
115
116#define MIME_BINARY_UNKNOWN    "application/octet-stream"
117#define MIME_TEXT_UNKNOWN    "text/plain"
118
119#define MAXMIMESTRING        256
120
121/* HOWMANY must be at least 4096 to make gzip -dcq work */
122#define HOWMANY  4096
123/* SMALL_HOWMANY limits how much work we do to figure out text files */
124#define SMALL_HOWMANY 1024
125#define MAXDESC    50   /* max leng of text description */
126#define MAXstring 64    /* max leng of "string" types */
127
128struct magic {
129    struct magic *next;     /* link to next entry */
130    int lineno;             /* line number from magic file */
131
132    short flag;
133#define INDIR  1            /* if '>(...)' appears,  */
134#define UNSIGNED 2          /* comparison is unsigned */
135    short cont_level;       /* level of ">" */
136    struct {
137        char type;          /* byte short long */
138        long offset;        /* offset from indirection */
139    } in;
140    long offset;            /* offset to magic number */
141    unsigned char reln;     /* relation (0=eq, '>'=gt, etc) */
142    char type;              /* int, short, long or string. */
143    char vallen;            /* length of string value, if any */
144#define BYTE      1
145#define SHORT     2
146#define LONG      4
147#define STRING    5
148#define DATE      6
149#define BESHORT   7
150#define BELONG    8
151#define BEDATE    9
152#define LESHORT  10
153#define LELONG   11
154#define LEDATE   12
155    union VALUETYPE {
156        unsigned char b;
157        unsigned short h;
158        unsigned long l;
159        char s[MAXstring];
160        unsigned char hs[2];   /* 2 bytes of a fixed-endian "short" */
161        unsigned char hl[4];   /* 2 bytes of a fixed-endian "long" */
162    } value;                   /* either number or string */
163    unsigned long mask;        /* mask before comparison with value */
164    char nospflag;             /* supress space character */
165
166    /* NOTE: this string is suspected of overrunning - find it! */
167    char desc[MAXDESC];        /* description */
168};
169
170/*
171 * data structures for tar file recognition
172 * --------------------------------------------------------------------------
173 * Header file for public domain tar (tape archive) program.
174 *
175 * @(#)tar.h 1.20 86/10/29    Public Domain. Created 25 August 1985 by John
176 * Gilmore, ihnp4!hoptoad!gnu.
177 *
178 * Header block on tape.
179 *
180 * I'm going to use traditional DP naming conventions here. A "block" is a big
181 * chunk of stuff that we do I/O on. A "record" is a piece of info that we
182 * care about. Typically many "record"s fit into a "block".
183 */
184#define RECORDSIZE    512
185#define NAMSIZ    100
186#define TUNMLEN    32
187#define TGNMLEN    32
188
189union record {
190    char charptr[RECORDSIZE];
191    struct header {
192        char name[NAMSIZ];
193        char mode[8];
194        char uid[8];
195        char gid[8];
196        char size[12];
197        char mtime[12];
198        char chksum[8];
199        char linkflag;
200        char linkname[NAMSIZ];
201        char magic[8];
202        char uname[TUNMLEN];
203        char gname[TGNMLEN];
204        char devmajor[8];
205        char devminor[8];
206    } header;
207};
208
209/* The magic field is filled with this if uname and gname are valid. */
210#define    TMAGIC        "ustar  "   /* 7 chars and a null */
211
212/*
213 * file-function prototypes
214 */
215static int ascmagic(request_rec *, unsigned char *, apr_size_t);
216static int is_tar(unsigned char *, apr_size_t);
217static int softmagic(request_rec *, unsigned char *, apr_size_t);
218static int tryit(request_rec *, unsigned char *, apr_size_t, int);
219static int zmagic(request_rec *, unsigned char *, apr_size_t);
220
221static int getvalue(server_rec *, struct magic *, char **);
222static int hextoint(int);
223static char *getstr(server_rec *, char *, char *, int, int *);
224static int parse(server_rec *, apr_pool_t *p, char *, int);
225
226static int match(request_rec *, unsigned char *, apr_size_t);
227static int mget(request_rec *, union VALUETYPE *, unsigned char *,
228                struct magic *, apr_size_t);
229static int mcheck(request_rec *, union VALUETYPE *, struct magic *);
230static void mprint(request_rec *, union VALUETYPE *, struct magic *);
231
232static int uncompress(request_rec *, int,
233                      unsigned char **, apr_size_t);
234static long from_oct(int, char *);
235static int fsmagic(request_rec *r, const char *fn);
236
237/*
238 * includes for ASCII substring recognition formerly "names.h" in file
239 * command
240 *
241 * Original notes: names and types used by ascmagic in file(1). These tokens are
242 * here because they can appear anywhere in the first HOWMANY bytes, while
243 * tokens in /etc/magic must appear at fixed offsets into the file. Don't
244 * make HOWMANY too high unless you have a very fast CPU.
245 */
246
247/* these types are used to index the apr_table_t 'types': keep em in sync! */
248/* HTML inserted in first because this is a web server module now */
249#define L_HTML    0   /* HTML */
250#define L_C       1   /* first and foremost on UNIX */
251#define L_FORT    2   /* the oldest one */
252#define L_MAKE    3   /* Makefiles */
253#define L_PLI     4   /* PL/1 */
254#define L_MACH    5   /* some kinda assembler */
255#define L_ENG     6   /* English */
256#define L_PAS     7   /* Pascal */
257#define L_MAIL    8   /* Electronic mail */
258#define L_NEWS    9   /* Usenet Netnews */
259
260static const char *types[] =
261{
262    "text/html",             /* HTML */
263    "text/plain",            /* "c program text", */
264    "text/plain",            /* "fortran program text", */
265    "text/plain",            /* "make commands text", */
266    "text/plain",            /* "pl/1 program text", */
267    "text/plain",            /* "assembler program text", */
268    "text/plain",            /* "English text", */
269    "text/plain",            /* "pascal program text", */
270    "message/rfc822",        /* "mail text", */
271    "message/news",          /* "news text", */
272    "application/binary",    /* "can't happen error on names.h/types", */
273    0
274};
275
276static const struct names {
277    const char *name;
278    short type;
279} names[] = {
280
281    /* These must be sorted by eye for optimal hit rate */
282    /* Add to this list only after substantial meditation */
283    {
284        "<html>", L_HTML
285    },
286    {
287        "<HTML>", L_HTML
288    },
289    {
290        "<head>", L_HTML
291    },
292    {
293        "<HEAD>", L_HTML
294    },
295    {
296        "<title>", L_HTML
297    },
298    {
299        "<TITLE>", L_HTML
300    },
301    {
302        "<h1>", L_HTML
303    },
304    {
305        "<H1>", L_HTML
306    },
307    {
308        "<!--", L_HTML
309    },
310    {
311        "<!DOCTYPE HTML", L_HTML
312    },
313    {
314        "/*", L_C
315    },               /* must precede "The", "the", etc. */
316    {
317        "#include", L_C
318    },
319    {
320        "char", L_C
321    },
322    {
323        "The", L_ENG
324    },
325    {
326        "the", L_ENG
327    },
328    {
329        "double", L_C
330    },
331    {
332        "extern", L_C
333    },
334    {
335        "float", L_C
336    },
337    {
338        "real", L_C
339    },
340    {
341        "struct", L_C
342    },
343    {
344        "union", L_C
345    },
346    {
347        "CFLAGS", L_MAKE
348    },
349    {
350        "LDFLAGS", L_MAKE
351    },
352    {
353        "all:", L_MAKE
354    },
355    {
356        ".PRECIOUS", L_MAKE
357    },
358    /*
359     * Too many files of text have these words in them.  Find another way to
360     * recognize Fortrash.
361     */
362#ifdef    NOTDEF
363    {
364        "subroutine", L_FORT
365    },
366    {
367        "function", L_FORT
368    },
369    {
370        "block", L_FORT
371    },
372    {
373        "common", L_FORT
374    },
375    {
376        "dimension", L_FORT
377    },
378    {
379        "integer", L_FORT
380    },
381    {
382        "data", L_FORT
383    },
384#endif /* NOTDEF */
385    {
386        ".ascii", L_MACH
387    },
388    {
389        ".asciiz", L_MACH
390    },
391    {
392        ".byte", L_MACH
393    },
394    {
395        ".even", L_MACH
396    },
397    {
398        ".globl", L_MACH
399    },
400    {
401        "clr", L_MACH
402    },
403    {
404        "(input,", L_PAS
405    },
406    {
407        "dcl", L_PLI
408    },
409    {
410        "Received:", L_MAIL
411    },
412    {
413        ">From", L_MAIL
414    },
415    {
416        "Return-Path:", L_MAIL
417    },
418    {
419        "Cc:", L_MAIL
420    },
421    {
422        "Newsgroups:", L_NEWS
423    },
424    {
425        "Path:", L_NEWS
426    },
427    {
428        "Organization:", L_NEWS
429    },
430    {
431        NULL, 0
432    }
433};
434
435#define NNAMES ((sizeof(names)/sizeof(struct names)) - 1)
436
437/*
438 * Result String List (RSL)
439 *
440 * The file(1) command prints its output.  Instead, we store the various
441 * "printed" strings in a list (allocating memory as we go) and concatenate
442 * them at the end when we finally know how much space they'll need.
443 */
444
445typedef struct magic_rsl_s {
446    const char *str;                  /* string, possibly a fragment */
447    struct magic_rsl_s *next;   /* pointer to next fragment */
448} magic_rsl;
449
450/*
451 * Apache module configuration structures
452 */
453
454/* per-server info */
455typedef struct {
456    const char *magicfile;    /* where magic be found */
457    struct magic *magic;      /* head of magic config list */
458    struct magic *last;
459} magic_server_config_rec;
460
461/* per-request info */
462typedef struct {
463    magic_rsl *head;          /* result string list */
464    magic_rsl *tail;
465    unsigned suf_recursion;   /* recursion depth in suffix check */
466} magic_req_rec;
467
468/*
469 * configuration functions - called by Apache API routines
470 */
471
472module AP_MODULE_DECLARE_DATA mime_magic_module;
473
474static void *create_magic_server_config(apr_pool_t *p, server_rec *d)
475{
476    /* allocate the config - use pcalloc because it needs to be zeroed */
477    return apr_pcalloc(p, sizeof(magic_server_config_rec));
478}
479
480static void *merge_magic_server_config(apr_pool_t *p, void *basev, void *addv)
481{
482    magic_server_config_rec *base = (magic_server_config_rec *) basev;
483    magic_server_config_rec *add = (magic_server_config_rec *) addv;
484    magic_server_config_rec *new = (magic_server_config_rec *)
485                            apr_palloc(p, sizeof(magic_server_config_rec));
486
487    new->magicfile = add->magicfile ? add->magicfile : base->magicfile;
488    new->magic = NULL;
489    new->last = NULL;
490    return new;
491}
492
493static const char *set_magicfile(cmd_parms *cmd, void *dummy, const char *arg)
494{
495    magic_server_config_rec *conf = (magic_server_config_rec *)
496    ap_get_module_config(cmd->server->module_config,
497                      &mime_magic_module);
498
499    if (!conf) {
500        return MODNAME ": server structure not allocated";
501    }
502    conf->magicfile = arg;
503    return NULL;
504}
505
506/*
507 * configuration file commands - exported to Apache API
508 */
509
510static const command_rec mime_magic_cmds[] =
511{
512    AP_INIT_TAKE1("MimeMagicFile", set_magicfile, NULL, RSRC_CONF,
513     "Path to MIME Magic file (in file(1) format)"),
514    {NULL}
515};
516
517/*
518 * RSL (result string list) processing routines
519 *
520 * These collect strings that would have been printed in fragments by file(1)
521 * into a list of magic_rsl structures with the strings. When complete,
522 * they're concatenated together to become the MIME content and encoding
523 * types.
524 *
525 * return value conventions for these functions: functions which return int:
526 * failure = -1, other = result functions which return pointers: failure = 0,
527 * other = result
528 */
529
530/* allocate a per-request structure and put it in the request record */
531static magic_req_rec *magic_set_config(request_rec *r)
532{
533    magic_req_rec *req_dat = (magic_req_rec *) apr_palloc(r->pool,
534                                                      sizeof(magic_req_rec));
535
536    req_dat->head = req_dat->tail = (magic_rsl *) NULL;
537    ap_set_module_config(r->request_config, &mime_magic_module, req_dat);
538    return req_dat;
539}
540
541/* add a string to the result string list for this request */
542/* it is the responsibility of the caller to allocate "str" */
543static int magic_rsl_add(request_rec *r, const char *str)
544{
545    magic_req_rec *req_dat = (magic_req_rec *)
546                    ap_get_module_config(r->request_config, &mime_magic_module);
547    magic_rsl *rsl;
548
549    /* make sure we have a list to put it in */
550    if (!req_dat) {
551        ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EINVAL, r, APLOGNO(01507)
552                    MODNAME ": request config should not be NULL");
553        if (!(req_dat = magic_set_config(r))) {
554            /* failure */
555            return -1;
556        }
557    }
558
559    /* allocate the list entry */
560    rsl = (magic_rsl *) apr_palloc(r->pool, sizeof(magic_rsl));
561
562    /* fill it */
563    rsl->str = str;
564    rsl->next = (magic_rsl *) NULL;
565
566    /* append to the list */
567    if (req_dat->head && req_dat->tail) {
568        req_dat->tail->next = rsl;
569        req_dat->tail = rsl;
570    }
571    else {
572        req_dat->head = req_dat->tail = rsl;
573    }
574
575    /* success */
576    return 0;
577}
578
579/* RSL hook for puts-type functions */
580static int magic_rsl_puts(request_rec *r, const char *str)
581{
582    return magic_rsl_add(r, str);
583}
584
585/* RSL hook for printf-type functions */
586static int magic_rsl_printf(request_rec *r, char *str,...)
587{
588    va_list ap;
589
590    char buf[MAXMIMESTRING];
591
592    /* assemble the string into the buffer */
593    va_start(ap, str);
594    apr_vsnprintf(buf, sizeof(buf), str, ap);
595    va_end(ap);
596
597    /* add the buffer to the list */
598    return magic_rsl_add(r, apr_pstrdup(r->pool, buf));
599}
600
601/* RSL hook for putchar-type functions */
602static int magic_rsl_putchar(request_rec *r, char c)
603{
604    char str[2];
605
606    /* high overhead for 1 char - just hope they don't do this much */
607    str[0] = c;
608    str[1] = '\0';
609    return magic_rsl_add(r, str);
610}
611
612/* allocate and copy a contiguous string from a result string list */
613static char *rsl_strdup(request_rec *r, int start_frag, int start_pos, int len)
614{
615    char *result;       /* return value */
616    int cur_frag,       /* current fragment number/counter */
617        cur_pos,        /* current position within fragment */
618        res_pos;        /* position in result string */
619    magic_rsl *frag;    /* list-traversal pointer */
620    magic_req_rec *req_dat = (magic_req_rec *)
621                    ap_get_module_config(r->request_config, &mime_magic_module);
622
623    /* allocate the result string */
624    result = (char *) apr_palloc(r->pool, len + 1);
625
626    /* loop through and collect the string */
627    res_pos = 0;
628    for (frag = req_dat->head, cur_frag = 0;
629         frag->next;
630         frag = frag->next, cur_frag++) {
631        /* loop to the first fragment */
632        if (cur_frag < start_frag)
633            continue;
634
635        /* loop through and collect chars */
636        for (cur_pos = (cur_frag == start_frag) ? start_pos : 0;
637             frag->str[cur_pos];
638             cur_pos++) {
639            if (cur_frag >= start_frag
640                && cur_pos >= start_pos
641                && res_pos <= len) {
642                result[res_pos++] = frag->str[cur_pos];
643                if (res_pos > len) {
644                    break;
645                }
646            }
647        }
648    }
649
650    /* clean up and return */
651    result[res_pos] = 0;
652#if MIME_MAGIC_DEBUG
653    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01508)
654             MODNAME ": rsl_strdup() %d chars: %s", res_pos - 1, result);
655#endif
656    return result;
657}
658
659/* states for the state-machine algorithm in magic_rsl_to_request() */
660typedef enum {
661    rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
662} rsl_states;
663
664/* process the RSL and set the MIME info in the request record */
665static int magic_rsl_to_request(request_rec *r)
666{
667    int cur_frag,         /* current fragment number/counter */
668        cur_pos,          /* current position within fragment */
669        type_frag,        /* content type starting point: fragment */
670        type_pos,         /* content type starting point: position */
671        type_len,         /* content type length */
672        encoding_frag,    /* content encoding starting point: fragment */
673        encoding_pos,     /* content encoding starting point: position */
674        encoding_len;     /* content encoding length */
675
676    char *tmp;
677    magic_rsl *frag;      /* list-traversal pointer */
678    rsl_states state;
679
680    magic_req_rec *req_dat = (magic_req_rec *)
681                    ap_get_module_config(r->request_config, &mime_magic_module);
682
683    /* check if we have a result */
684    if (!req_dat || !req_dat->head) {
685        /* empty - no match, we defer to other Apache modules */
686        return DECLINED;
687    }
688
689    /* start searching for the type and encoding */
690    state = rsl_leading_space;
691    type_frag = type_pos = type_len = 0;
692    encoding_frag = encoding_pos = encoding_len = 0;
693    for (frag = req_dat->head, cur_frag = 0;
694         frag && frag->next;
695         frag = frag->next, cur_frag++) {
696        /* loop through the characters in the fragment */
697        for (cur_pos = 0; frag->str[cur_pos]; cur_pos++) {
698            if (apr_isspace(frag->str[cur_pos])) {
699                /* process whitespace actions for each state */
700                if (state == rsl_leading_space) {
701                    /* eat whitespace in this state */
702                    continue;
703                }
704                else if (state == rsl_type) {
705                    /* whitespace: type has no slash! */
706                    return DECLINED;
707                }
708                else if (state == rsl_subtype) {
709                    /* whitespace: end of MIME type */
710                    state++;
711                    continue;
712                }
713                else if (state == rsl_separator) {
714                    /* eat whitespace in this state */
715                    continue;
716                }
717                else if (state == rsl_encoding) {
718                    /* whitespace: end of MIME encoding */
719                    /* we're done */
720                    frag = req_dat->tail;
721                    break;
722                }
723                else {
724                    /* should not be possible */
725                    /* abandon malfunctioning module */
726                    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01509)
727                                MODNAME ": bad state %d (ws)", state);
728                    return DECLINED;
729                }
730                /* NOTREACHED */
731            }
732            else if (state == rsl_type &&
733                     frag->str[cur_pos] == '/') {
734                /* copy the char and go to rsl_subtype state */
735                type_len++;
736                state++;
737            }
738            else {
739                /* process non-space actions for each state */
740                if (state == rsl_leading_space) {
741                    /* non-space: begin MIME type */
742                    state++;
743                    type_frag = cur_frag;
744                    type_pos = cur_pos;
745                    type_len = 1;
746                    continue;
747                }
748                else if (state == rsl_type ||
749                         state == rsl_subtype) {
750                    /* non-space: adds to type */
751                    type_len++;
752                    continue;
753                }
754                else if (state == rsl_separator) {
755                    /* non-space: begin MIME encoding */
756                    state++;
757                    encoding_frag = cur_frag;
758                    encoding_pos = cur_pos;
759                    encoding_len = 1;
760                    continue;
761                }
762                else if (state == rsl_encoding) {
763                    /* non-space: adds to encoding */
764                    encoding_len++;
765                    continue;
766                }
767                else {
768                    /* should not be possible */
769                    /* abandon malfunctioning module */
770                    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01510)
771                                MODNAME ": bad state %d (ns)", state);
772                    return DECLINED;
773                }
774                /* NOTREACHED */
775            }
776            /* NOTREACHED */
777        }
778    }
779
780    /* if we ended prior to state rsl_subtype, we had incomplete info */
781    if (state != rsl_subtype && state != rsl_separator &&
782        state != rsl_encoding) {
783        /* defer to other modules */
784        return DECLINED;
785    }
786
787    /* save the info in the request record */
788    tmp = rsl_strdup(r, type_frag, type_pos, type_len);
789    /* XXX: this could be done at config time I'm sure... but I'm
790     * confused by all this magic_rsl stuff. -djg */
791    ap_content_type_tolower(tmp);
792    ap_set_content_type(r, tmp);
793
794    if (state == rsl_encoding) {
795        tmp = rsl_strdup(r, encoding_frag,
796                                         encoding_pos, encoding_len);
797        /* XXX: this could be done at config time I'm sure... but I'm
798         * confused by all this magic_rsl stuff. -djg */
799        ap_str_tolower(tmp);
800        r->content_encoding = tmp;
801    }
802
803    /* detect memory allocation or other errors */
804    if (!r->content_type ||
805        (state == rsl_encoding && !r->content_encoding)) {
806        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01511)
807                      MODNAME ": unexpected state %d; could be caused by bad "
808                      "data in magic file",
809                      state);
810        return HTTP_INTERNAL_SERVER_ERROR;
811    }
812
813    /* success! */
814    return OK;
815}
816
817/*
818 * magic_process - process input file r        Apache API request record
819 * (formerly called "process" in file command, prefix added for clarity) Opens
820 * the file and reads a fixed-size buffer to begin processing the contents.
821 */
822static int magic_process(request_rec *r)
823{
824    apr_file_t *fd = NULL;
825    unsigned char buf[HOWMANY + 1];  /* one extra for terminating '\0' */
826    apr_size_t nbytes = 0;           /* number of bytes read from a datafile */
827    int result;
828
829    /*
830     * first try judging the file based on its filesystem status
831     */
832    switch ((result = fsmagic(r, r->filename))) {
833    case DONE:
834        magic_rsl_putchar(r, '\n');
835        return OK;
836    case OK:
837        break;
838    default:
839        /* fatal error, bail out */
840        return result;
841    }
842
843    if (apr_file_open(&fd, r->filename, APR_READ, APR_OS_DEFAULT, r->pool) != APR_SUCCESS) {
844        /* We can't open it, but we were able to stat it. */
845        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01512)
846                    MODNAME ": can't read `%s'", r->filename);
847        /* let some other handler decide what the problem is */
848        return DECLINED;
849    }
850
851    /*
852     * try looking at the first HOWMANY bytes
853     */
854    nbytes = sizeof(buf) - 1;
855    if ((result = apr_file_read(fd, (char *) buf, &nbytes)) != APR_SUCCESS) {
856        ap_log_rerror(APLOG_MARK, APLOG_ERR, result, r, APLOGNO(01513)
857                    MODNAME ": read failed: %s", r->filename);
858        return HTTP_INTERNAL_SERVER_ERROR;
859    }
860
861    if (nbytes == 0) {
862        return DECLINED;
863    }
864    else {
865        buf[nbytes++] = '\0';  /* null-terminate it */
866        result = tryit(r, buf, nbytes, 1);
867        if (result != OK) {
868            return result;
869        }
870    }
871
872    (void) apr_file_close(fd);
873    (void) magic_rsl_putchar(r, '\n');
874
875    return OK;
876}
877
878
879static int tryit(request_rec *r, unsigned char *buf, apr_size_t nb,
880                 int checkzmagic)
881{
882    /*
883     * Try compression stuff
884     */
885    if (checkzmagic == 1) {
886        if (zmagic(r, buf, nb) == 1)
887            return OK;
888    }
889
890    /*
891     * try tests in /etc/magic (or surrogate magic file)
892     */
893    if (softmagic(r, buf, nb) == 1)
894        return OK;
895
896    /*
897     * try known keywords, check for ascii-ness too.
898     */
899    if (ascmagic(r, buf, nb) == 1)
900        return OK;
901
902    /*
903     * abandon hope, all ye who remain here
904     */
905    return DECLINED;
906}
907
908#define    EATAB {while (apr_isspace(*l))  ++l;}
909
910/*
911 * apprentice - load configuration from the magic file r
912 *  API request record
913 */
914static int apprentice(server_rec *s, apr_pool_t *p)
915{
916    apr_file_t *f = NULL;
917    apr_status_t result;
918    char line[BUFSIZ + 1];
919    int errs = 0;
920    int lineno;
921#if MIME_MAGIC_DEBUG
922    int rule = 0;
923    struct magic *m, *prevm;
924#endif
925    magic_server_config_rec *conf = (magic_server_config_rec *)
926                    ap_get_module_config(s->module_config, &mime_magic_module);
927    const char *fname = ap_server_root_relative(p, conf->magicfile);
928
929    if (!fname) {
930        ap_log_error(APLOG_MARK, APLOG_ERR, APR_EBADPATH, s, APLOGNO(01514)
931                     MODNAME ": Invalid magic file path %s", conf->magicfile);
932        return -1;
933    }
934    if ((result = apr_file_open(&f, fname, APR_READ | APR_BUFFERED,
935                                APR_OS_DEFAULT, p)) != APR_SUCCESS) {
936        ap_log_error(APLOG_MARK, APLOG_ERR, result, s, APLOGNO(01515)
937                     MODNAME ": can't read magic file %s", fname);
938        return -1;
939    }
940
941    /* set up the magic list (empty) */
942    conf->magic = conf->last = NULL;
943
944    /* parse it */
945    for (lineno = 1; apr_file_gets(line, BUFSIZ, f) == APR_SUCCESS; lineno++) {
946        int ws_offset;
947        char *last = line + strlen(line) - 1; /* guaranteed that len >= 1 since an
948                                               * "empty" line contains a '\n'
949                                               */
950
951        /* delete newline and any other trailing whitespace */
952        while (last >= line
953               && apr_isspace(*last)) {
954            *last = '\0';
955            --last;
956        }
957
958        /* skip leading whitespace */
959        ws_offset = 0;
960        while (line[ws_offset] && apr_isspace(line[ws_offset])) {
961            ws_offset++;
962        }
963
964        /* skip blank lines */
965        if (line[ws_offset] == 0) {
966            continue;
967        }
968
969        /* comment, do not parse */
970        if (line[ws_offset] == '#')
971            continue;
972
973#if MIME_MAGIC_DEBUG
974        /* if we get here, we're going to use it so count it */
975        rule++;
976#endif
977
978        /* parse it */
979        if (parse(s, p, line + ws_offset, lineno) != 0)
980            ++errs;
981    }
982
983    (void) apr_file_close(f);
984
985#if MIME_MAGIC_DEBUG
986    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01516)
987                MODNAME ": apprentice conf=%x file=%s m=%s m->next=%s last=%s",
988                conf,
989                conf->magicfile ? conf->magicfile : "NULL",
990                conf->magic ? "set" : "NULL",
991                (conf->magic && conf->magic->next) ? "set" : "NULL",
992                conf->last ? "set" : "NULL");
993    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01517)
994                MODNAME ": apprentice read %d lines, %d rules, %d errors",
995                lineno, rule, errs);
996#endif
997
998#if MIME_MAGIC_DEBUG
999    prevm = 0;
1000    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01518)
1001                MODNAME ": apprentice test");
1002    for (m = conf->magic; m; m = m->next) {
1003        if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
1004            apr_isprint((((unsigned long) m) >> 16) & 255) &&
1005            apr_isprint((((unsigned long) m) >> 8) & 255) &&
1006            apr_isprint(((unsigned long) m) & 255)) {
1007            ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01519)
1008                        MODNAME ": apprentice: POINTER CLOBBERED! "
1009                        "m=\"%c%c%c%c\" line=%d",
1010                        (((unsigned long) m) >> 24) & 255,
1011                        (((unsigned long) m) >> 16) & 255,
1012                        (((unsigned long) m) >> 8) & 255,
1013                        ((unsigned long) m) & 255,
1014                        prevm ? prevm->lineno : -1);
1015            break;
1016        }
1017        prevm = m;
1018    }
1019#endif
1020
1021    return (errs ? -1 : 0);
1022}
1023
1024/*
1025 * extend the sign bit if the comparison is to be signed
1026 */
1027static unsigned long signextend(server_rec *s, struct magic *m, unsigned long v)
1028{
1029    if (!(m->flag & UNSIGNED))
1030        switch (m->type) {
1031            /*
1032             * Do not remove the casts below.  They are vital. When later
1033             * compared with the data, the sign extension must have happened.
1034             */
1035        case BYTE:
1036            v = (char) v;
1037            break;
1038        case SHORT:
1039        case BESHORT:
1040        case LESHORT:
1041            v = (short) v;
1042            break;
1043        case DATE:
1044        case BEDATE:
1045        case LEDATE:
1046        case LONG:
1047        case BELONG:
1048        case LELONG:
1049            v = (long) v;
1050            break;
1051        case STRING:
1052            break;
1053        default:
1054            ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, APLOGNO(01520)
1055                        MODNAME ": can't happen: m->type=%d", m->type);
1056            return -1;
1057        }
1058    return v;
1059}
1060
1061/*
1062 * parse one line from magic file, put into magic[index++] if valid
1063 */
1064static int parse(server_rec *serv, apr_pool_t *p, char *l, int lineno)
1065{
1066    struct magic *m;
1067    char *t, *s;
1068    magic_server_config_rec *conf = (magic_server_config_rec *)
1069                    ap_get_module_config(serv->module_config, &mime_magic_module);
1070
1071    /* allocate magic structure entry */
1072    m = (struct magic *) apr_pcalloc(p, sizeof(struct magic));
1073
1074    /* append to linked list */
1075    m->next = NULL;
1076    if (!conf->magic || !conf->last) {
1077        conf->magic = conf->last = m;
1078    }
1079    else {
1080        conf->last->next = m;
1081        conf->last = m;
1082    }
1083
1084    /* set values in magic structure */
1085    m->flag = 0;
1086    m->cont_level = 0;
1087    m->lineno = lineno;
1088
1089    while (*l == '>') {
1090        ++l;  /* step over */
1091        m->cont_level++;
1092    }
1093
1094    if (m->cont_level != 0 && *l == '(') {
1095        ++l;  /* step over */
1096        m->flag |= INDIR;
1097    }
1098
1099    /* get offset, then skip over it */
1100    m->offset = (int) strtol(l, &t, 0);
1101    if (l == t) {
1102        ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01521)
1103                    MODNAME ": offset %s invalid", l);
1104    }
1105    l = t;
1106
1107    if (m->flag & INDIR) {
1108        m->in.type = LONG;
1109        m->in.offset = 0;
1110        /*
1111         * read [.lbs][+-]nnnnn)
1112         */
1113        if (*l == '.') {
1114            switch (*++l) {
1115            case 'l':
1116                m->in.type = LONG;
1117                break;
1118            case 's':
1119                m->in.type = SHORT;
1120                break;
1121            case 'b':
1122                m->in.type = BYTE;
1123                break;
1124            default:
1125                ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01522)
1126                        MODNAME ": indirect offset type %c invalid", *l);
1127                break;
1128            }
1129            l++;
1130        }
1131        s = l;
1132        if (*l == '+' || *l == '-')
1133            l++;
1134        if (apr_isdigit((unsigned char) *l)) {
1135            m->in.offset = strtol(l, &t, 0);
1136            if (*s == '-')
1137                m->in.offset = -m->in.offset;
1138        }
1139        else
1140            t = l;
1141        if (*t++ != ')') {
1142            ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01523)
1143                        MODNAME ": missing ')' in indirect offset");
1144        }
1145        l = t;
1146    }
1147
1148
1149    while (apr_isdigit((unsigned char) *l))
1150        ++l;
1151    EATAB;
1152
1153#define NBYTE           4
1154#define NSHORT          5
1155#define NLONG           4
1156#define NSTRING         6
1157#define NDATE           4
1158#define NBESHORT        7
1159#define NBELONG         6
1160#define NBEDATE         6
1161#define NLESHORT        7
1162#define NLELONG         6
1163#define NLEDATE         6
1164
1165    if (*l == 'u') {
1166        ++l;
1167        m->flag |= UNSIGNED;
1168    }
1169
1170    /* get type, skip it */
1171    if (strncmp(l, "byte", NBYTE) == 0) {
1172        m->type = BYTE;
1173        l += NBYTE;
1174    }
1175    else if (strncmp(l, "short", NSHORT) == 0) {
1176        m->type = SHORT;
1177        l += NSHORT;
1178    }
1179    else if (strncmp(l, "long", NLONG) == 0) {
1180        m->type = LONG;
1181        l += NLONG;
1182    }
1183    else if (strncmp(l, "string", NSTRING) == 0) {
1184        m->type = STRING;
1185        l += NSTRING;
1186    }
1187    else if (strncmp(l, "date", NDATE) == 0) {
1188        m->type = DATE;
1189        l += NDATE;
1190    }
1191    else if (strncmp(l, "beshort", NBESHORT) == 0) {
1192        m->type = BESHORT;
1193        l += NBESHORT;
1194    }
1195    else if (strncmp(l, "belong", NBELONG) == 0) {
1196        m->type = BELONG;
1197        l += NBELONG;
1198    }
1199    else if (strncmp(l, "bedate", NBEDATE) == 0) {
1200        m->type = BEDATE;
1201        l += NBEDATE;
1202    }
1203    else if (strncmp(l, "leshort", NLESHORT) == 0) {
1204        m->type = LESHORT;
1205        l += NLESHORT;
1206    }
1207    else if (strncmp(l, "lelong", NLELONG) == 0) {
1208        m->type = LELONG;
1209        l += NLELONG;
1210    }
1211    else if (strncmp(l, "ledate", NLEDATE) == 0) {
1212        m->type = LEDATE;
1213        l += NLEDATE;
1214    }
1215    else {
1216        ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01524)
1217                    MODNAME ": type %s invalid", l);
1218        return -1;
1219    }
1220    /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
1221    if (*l == '&') {
1222        ++l;
1223        m->mask = signextend(serv, m, strtol(l, &l, 0));
1224    }
1225    else
1226        m->mask = ~0L;
1227    EATAB;
1228
1229    switch (*l) {
1230    case '>':
1231    case '<':
1232        /* Old-style anding: "0 byte &0x80 dynamically linked" */
1233    case '&':
1234    case '^':
1235    case '=':
1236        m->reln = *l;
1237        ++l;
1238        break;
1239    case '!':
1240        if (m->type != STRING) {
1241            m->reln = *l;
1242            ++l;
1243            break;
1244        }
1245        /* FALL THROUGH */
1246    default:
1247        if (*l == 'x' && apr_isspace(l[1])) {
1248            m->reln = *l;
1249            ++l;
1250            goto GetDesc;  /* Bill The Cat */
1251        }
1252        m->reln = '=';
1253        break;
1254    }
1255    EATAB;
1256
1257    if (getvalue(serv, m, &l))
1258        return -1;
1259    /*
1260     * now get last part - the description
1261     */
1262  GetDesc:
1263    EATAB;
1264    if (l[0] == '\b') {
1265        ++l;
1266        m->nospflag = 1;
1267    }
1268    else if ((l[0] == '\\') && (l[1] == 'b')) {
1269        ++l;
1270        ++l;
1271        m->nospflag = 1;
1272    }
1273    else
1274        m->nospflag = 0;
1275    apr_cpystrn(m->desc, l, sizeof(m->desc));
1276
1277#if MIME_MAGIC_DEBUG
1278    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, serv, APLOGNO(01525)
1279                MODNAME ": parse line=%d m=%x next=%x cont=%d desc=%s",
1280                lineno, m, m->next, m->cont_level, m->desc);
1281#endif /* MIME_MAGIC_DEBUG */
1282
1283    return 0;
1284}
1285
1286/*
1287 * Read a numeric value from a pointer, into the value union of a magic
1288 * pointer, according to the magic type.  Update the string pointer to point
1289 * just after the number read.  Return 0 for success, non-zero for failure.
1290 */
1291static int getvalue(server_rec *s, struct magic *m, char **p)
1292{
1293    int slen;
1294
1295    if (m->type == STRING) {
1296        *p = getstr(s, *p, m->value.s, sizeof(m->value.s), &slen);
1297        m->vallen = slen;
1298    }
1299    else if (m->reln != 'x')
1300        m->value.l = signextend(s, m, strtol(*p, p, 0));
1301    return 0;
1302}
1303
1304/*
1305 * Convert a string containing C character escapes.  Stop at an unescaped
1306 * space or tab. Copy the converted version to "p", returning its length in
1307 * *slen. Return updated scan pointer as function result.
1308 */
1309static char *getstr(server_rec *serv, register char *s, register char *p,
1310                    int plen, int *slen)
1311{
1312    char *origs = s, *origp = p;
1313    char *pmax = p + plen - 1;
1314    register int c;
1315    register int val;
1316
1317    while ((c = *s++) != '\0') {
1318        if (apr_isspace(c))
1319            break;
1320        if (p >= pmax) {
1321            ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01526)
1322                        MODNAME ": string too long: %s", origs);
1323            break;
1324        }
1325        if (c == '\\') {
1326            switch (c = *s++) {
1327
1328            case '\0':
1329                goto out;
1330
1331            default:
1332                *p++ = (char) c;
1333                break;
1334
1335            case 'n':
1336                *p++ = '\n';
1337                break;
1338
1339            case 'r':
1340                *p++ = '\r';
1341                break;
1342
1343            case 'b':
1344                *p++ = '\b';
1345                break;
1346
1347            case 't':
1348                *p++ = '\t';
1349                break;
1350
1351            case 'f':
1352                *p++ = '\f';
1353                break;
1354
1355            case 'v':
1356                *p++ = '\v';
1357                break;
1358
1359                /* \ and up to 3 octal digits */
1360            case '0':
1361            case '1':
1362            case '2':
1363            case '3':
1364            case '4':
1365            case '5':
1366            case '6':
1367            case '7':
1368                val = c - '0';
1369                c = *s++;  /* try for 2 */
1370                if (c >= '0' && c <= '7') {
1371                    val = (val << 3) | (c - '0');
1372                    c = *s++;  /* try for 3 */
1373                    if (c >= '0' && c <= '7')
1374                        val = (val << 3) | (c - '0');
1375                    else
1376                        --s;
1377                }
1378                else
1379                    --s;
1380                *p++ = (char) val;
1381                break;
1382
1383                /* \x and up to 3 hex digits */
1384            case 'x':
1385                val = 'x';            /* Default if no digits */
1386                c = hextoint(*s++);   /* Get next char */
1387                if (c >= 0) {
1388                    val = c;
1389                    c = hextoint(*s++);
1390                    if (c >= 0) {
1391                        val = (val << 4) + c;
1392                        c = hextoint(*s++);
1393                        if (c >= 0) {
1394                            val = (val << 4) + c;
1395                        }
1396                        else
1397                            --s;
1398                    }
1399                    else
1400                        --s;
1401                }
1402                else
1403                    --s;
1404                *p++ = (char) val;
1405                break;
1406            }
1407        }
1408        else
1409            *p++ = (char) c;
1410    }
1411  out:
1412    *p = '\0';
1413    *slen = p - origp;
1414    return s;
1415}
1416
1417
1418/* Single hex char to int; -1 if not a hex char. */
1419static int hextoint(int c)
1420{
1421    if (apr_isdigit(c))
1422        return c - '0';
1423    if ((c >= 'a') && (c <= 'f'))
1424        return c + 10 - 'a';
1425    if ((c >= 'A') && (c <= 'F'))
1426        return c + 10 - 'A';
1427    return -1;
1428}
1429
1430
1431/*
1432 * return DONE to indicate it's been handled
1433 * return OK to indicate it's a regular file still needing handling
1434 * other returns indicate a failure of some sort
1435 */
1436static int fsmagic(request_rec *r, const char *fn)
1437{
1438    switch (r->finfo.filetype) {
1439    case APR_DIR:
1440        magic_rsl_puts(r, DIR_MAGIC_TYPE);
1441        return DONE;
1442    case APR_CHR:
1443        /*
1444         * (void) magic_rsl_printf(r,"character special (%d/%d)",
1445         * major(sb->st_rdev), minor(sb->st_rdev));
1446         */
1447        (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1448        return DONE;
1449    case APR_BLK:
1450        /*
1451         * (void) magic_rsl_printf(r,"block special (%d/%d)",
1452         * major(sb->st_rdev), minor(sb->st_rdev));
1453         */
1454        (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1455        return DONE;
1456        /* TODO add code to handle V7 MUX and Blit MUX files */
1457    case APR_PIPE:
1458        /*
1459         * magic_rsl_puts(r,"fifo (named pipe)");
1460         */
1461        (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1462        return DONE;
1463    case APR_LNK:
1464        /* We used stat(), the only possible reason for this is that the
1465         * symlink is broken.
1466         */
1467        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01527)
1468                    MODNAME ": broken symlink (%s)", fn);
1469        return HTTP_INTERNAL_SERVER_ERROR;
1470    case APR_SOCK:
1471        magic_rsl_puts(r, MIME_BINARY_UNKNOWN);
1472        return DONE;
1473    case APR_REG:
1474        break;
1475    default:
1476        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01528)
1477                      MODNAME ": invalid file type %d.", r->finfo.filetype);
1478        return HTTP_INTERNAL_SERVER_ERROR;
1479    }
1480
1481    /*
1482     * regular file, check next possibility
1483     */
1484    if (r->finfo.size == 0) {
1485        magic_rsl_puts(r, MIME_TEXT_UNKNOWN);
1486        return DONE;
1487    }
1488    return OK;
1489}
1490
1491/*
1492 * softmagic - lookup one file in database (already read from /etc/magic by
1493 * apprentice.c). Passed the name and FILE * of one file to be typed.
1494 */
1495                /* ARGSUSED1 *//* nbytes passed for regularity, maybe need later */
1496static int softmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
1497{
1498    if (match(r, buf, nbytes))
1499        return 1;
1500
1501    return 0;
1502}
1503
1504/*
1505 * Go through the whole list, stopping if you find a match.  Process all the
1506 * continuations of that match before returning.
1507 *
1508 * We support multi-level continuations:
1509 *
1510 * At any time when processing a successful top-level match, there is a current
1511 * continuation level; it represents the level of the last successfully
1512 * matched continuation.
1513 *
1514 * Continuations above that level are skipped as, if we see one, it means that
1515 * the continuation that controls them - i.e, the lower-level continuation
1516 * preceding them - failed to match.
1517 *
1518 * Continuations below that level are processed as, if we see one, it means
1519 * we've finished processing or skipping higher-level continuations under the
1520 * control of a successful or unsuccessful lower-level continuation, and are
1521 * now seeing the next lower-level continuation and should process it.  The
1522 * current continuation level reverts to the level of the one we're seeing.
1523 *
1524 * Continuations at the current level are processed as, if we see one, there's
1525 * no lower-level continuation that may have failed.
1526 *
1527 * If a continuation matches, we bump the current continuation level so that
1528 * higher-level continuations are processed.
1529 */
1530static int match(request_rec *r, unsigned char *s, apr_size_t nbytes)
1531{
1532#if MIME_MAGIC_DEBUG
1533    int rule_counter = 0;
1534#endif
1535    int cont_level = 0;
1536    int need_separator = 0;
1537    union VALUETYPE p;
1538    magic_server_config_rec *conf = (magic_server_config_rec *)
1539                ap_get_module_config(r->server->module_config, &mime_magic_module);
1540    struct magic *m;
1541
1542#if MIME_MAGIC_DEBUG
1543    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01529)
1544                MODNAME ": match conf=%x file=%s m=%s m->next=%s last=%s",
1545                conf,
1546                conf->magicfile ? conf->magicfile : "NULL",
1547                conf->magic ? "set" : "NULL",
1548                (conf->magic && conf->magic->next) ? "set" : "NULL",
1549                conf->last ? "set" : "NULL");
1550#endif
1551
1552#if MIME_MAGIC_DEBUG
1553    for (m = conf->magic; m; m = m->next) {
1554        if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
1555            apr_isprint((((unsigned long) m) >> 16) & 255) &&
1556            apr_isprint((((unsigned long) m) >> 8) & 255) &&
1557            apr_isprint(((unsigned long) m) & 255)) {
1558            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01530)
1559                        MODNAME ": match: POINTER CLOBBERED! "
1560                        "m=\"%c%c%c%c\"",
1561                        (((unsigned long) m) >> 24) & 255,
1562                        (((unsigned long) m) >> 16) & 255,
1563                        (((unsigned long) m) >> 8) & 255,
1564                        ((unsigned long) m) & 255);
1565            break;
1566        }
1567    }
1568#endif
1569
1570    for (m = conf->magic; m; m = m->next) {
1571#if MIME_MAGIC_DEBUG
1572        rule_counter++;
1573        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01531)
1574                    MODNAME ": line=%d desc=%s", m->lineno, m->desc);
1575#endif
1576
1577        /* check if main entry matches */
1578        if (!mget(r, &p, s, m, nbytes) ||
1579            !mcheck(r, &p, m)) {
1580            struct magic *m_cont;
1581
1582            /*
1583             * main entry didn't match, flush its continuations
1584             */
1585            if (!m->next || (m->next->cont_level == 0)) {
1586                continue;
1587            }
1588
1589            m_cont = m->next;
1590            while (m_cont && (m_cont->cont_level != 0)) {
1591#if MIME_MAGIC_DEBUG
1592                rule_counter++;
1593                ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01532)
1594                        MODNAME ": line=%d mc=%x mc->next=%x cont=%d desc=%s",
1595                            m_cont->lineno, m_cont,
1596                            m_cont->next, m_cont->cont_level,
1597                            m_cont->desc);
1598#endif
1599                /*
1600                 * this trick allows us to keep *m in sync when the continue
1601                 * advances the pointer
1602                 */
1603                m = m_cont;
1604                m_cont = m_cont->next;
1605            }
1606            continue;
1607        }
1608
1609        /* if we get here, the main entry rule was a match */
1610        /* this will be the last run through the loop */
1611#if MIME_MAGIC_DEBUG
1612        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01533)
1613                    MODNAME ": rule matched, line=%d type=%d %s",
1614                    m->lineno, m->type,
1615                    (m->type == STRING) ? m->value.s : "");
1616#endif
1617
1618        /* print the match */
1619        mprint(r, &p, m);
1620
1621        /*
1622         * If we printed something, we'll need to print a blank before we
1623         * print something else.
1624         */
1625        if (m->desc[0])
1626            need_separator = 1;
1627        /* and any continuations that match */
1628        cont_level++;
1629        /*
1630         * while (m && m->next && m->next->cont_level != 0 && ( m = m->next
1631         * ))
1632         */
1633        m = m->next;
1634        while (m && (m->cont_level != 0)) {
1635#if MIME_MAGIC_DEBUG
1636            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01534)
1637                        MODNAME ": match line=%d cont=%d type=%d %s",
1638                        m->lineno, m->cont_level, m->type,
1639                        (m->type == STRING) ? m->value.s : "");
1640#endif
1641            if (cont_level >= m->cont_level) {
1642                if (cont_level > m->cont_level) {
1643                    /*
1644                     * We're at the end of the level "cont_level"
1645                     * continuations.
1646                     */
1647                    cont_level = m->cont_level;
1648                }
1649                if (mget(r, &p, s, m, nbytes) &&
1650                    mcheck(r, &p, m)) {
1651                    /*
1652                     * This continuation matched. Print its message, with a
1653                     * blank before it if the previous item printed and this
1654                     * item isn't empty.
1655                     */
1656                    /* space if previous printed */
1657                    if (need_separator
1658                        && (m->nospflag == 0)
1659                        && (m->desc[0] != '\0')
1660                        ) {
1661                        (void) magic_rsl_putchar(r, ' ');
1662                        need_separator = 0;
1663                    }
1664                    mprint(r, &p, m);
1665                    if (m->desc[0])
1666                        need_separator = 1;
1667
1668                    /*
1669                     * If we see any continuations at a higher level, process
1670                     * them.
1671                     */
1672                    cont_level++;
1673                }
1674            }
1675
1676            /* move to next continuation record */
1677            m = m->next;
1678        }
1679#if MIME_MAGIC_DEBUG
1680        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01535)
1681                    MODNAME ": matched after %d rules", rule_counter);
1682#endif
1683        return 1;  /* all through */
1684    }
1685#if MIME_MAGIC_DEBUG
1686    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01536)
1687                MODNAME ": failed after %d rules", rule_counter);
1688#endif
1689    return 0;  /* no match at all */
1690}
1691
1692static void mprint(request_rec *r, union VALUETYPE *p, struct magic *m)
1693{
1694    char *pp;
1695    unsigned long v;
1696    char time_str[APR_CTIME_LEN];
1697
1698    switch (m->type) {
1699    case BYTE:
1700        v = p->b;
1701        break;
1702
1703    case SHORT:
1704    case BESHORT:
1705    case LESHORT:
1706        v = p->h;
1707        break;
1708
1709    case LONG:
1710    case BELONG:
1711    case LELONG:
1712        v = p->l;
1713        break;
1714
1715    case STRING:
1716        if (m->reln == '=') {
1717            (void) magic_rsl_printf(r, m->desc, m->value.s);
1718        }
1719        else {
1720            (void) magic_rsl_printf(r, m->desc, p->s);
1721        }
1722        return;
1723
1724    case DATE:
1725    case BEDATE:
1726    case LEDATE:
1727        apr_ctime(time_str, apr_time_from_sec(*(time_t *)&p->l));
1728        pp = time_str;
1729        (void) magic_rsl_printf(r, m->desc, pp);
1730        return;
1731    default:
1732        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01537)
1733                    MODNAME ": invalid m->type (%d) in mprint().",
1734                    m->type);
1735        return;
1736    }
1737
1738    v = signextend(r->server, m, v) & m->mask;
1739    (void) magic_rsl_printf(r, m->desc, (unsigned long) v);
1740}
1741
1742/*
1743 * Convert the byte order of the data we are looking at
1744 */
1745static int mconvert(request_rec *r, union VALUETYPE *p, struct magic *m)
1746{
1747    char *rt;
1748
1749    switch (m->type) {
1750    case BYTE:
1751    case SHORT:
1752    case LONG:
1753    case DATE:
1754        return 1;
1755    case STRING:
1756        /* Null terminate and eat the return */
1757        p->s[sizeof(p->s) - 1] = '\0';
1758        if ((rt = strchr(p->s, '\n')) != NULL)
1759            *rt = '\0';
1760        return 1;
1761    case BESHORT:
1762        p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
1763        return 1;
1764    case BELONG:
1765    case BEDATE:
1766        p->l = (long)
1767            ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
1768        return 1;
1769    case LESHORT:
1770        p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
1771        return 1;
1772    case LELONG:
1773    case LEDATE:
1774        p->l = (long)
1775            ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
1776        return 1;
1777    default:
1778        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01538)
1779                    MODNAME ": invalid type %d in mconvert().", m->type);
1780        return 0;
1781    }
1782}
1783
1784
1785static int mget(request_rec *r, union VALUETYPE *p, unsigned char *s,
1786                struct magic *m, apr_size_t nbytes)
1787{
1788    long offset = m->offset;
1789
1790    if (offset + sizeof(union VALUETYPE) > nbytes)
1791                  return 0;
1792
1793    memcpy(p, s + offset, sizeof(union VALUETYPE));
1794
1795    if (!mconvert(r, p, m))
1796        return 0;
1797
1798    if (m->flag & INDIR) {
1799
1800        switch (m->in.type) {
1801        case BYTE:
1802            offset = p->b + m->in.offset;
1803            break;
1804        case SHORT:
1805            offset = p->h + m->in.offset;
1806            break;
1807        case LONG:
1808            offset = p->l + m->in.offset;
1809            break;
1810        }
1811
1812        if (offset + sizeof(union VALUETYPE) > nbytes)
1813                      return 0;
1814
1815        memcpy(p, s + offset, sizeof(union VALUETYPE));
1816
1817        if (!mconvert(r, p, m))
1818            return 0;
1819    }
1820    return 1;
1821}
1822
1823static int mcheck(request_rec *r, union VALUETYPE *p, struct magic *m)
1824{
1825    register unsigned long l = m->value.l;
1826    register unsigned long v;
1827    int matched;
1828
1829    if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
1830        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01539)
1831                    MODNAME ": BOINK");
1832        return 1;
1833    }
1834
1835    switch (m->type) {
1836    case BYTE:
1837        v = p->b;
1838        break;
1839
1840    case SHORT:
1841    case BESHORT:
1842    case LESHORT:
1843        v = p->h;
1844        break;
1845
1846    case LONG:
1847    case BELONG:
1848    case LELONG:
1849    case DATE:
1850    case BEDATE:
1851    case LEDATE:
1852        v = p->l;
1853        break;
1854
1855    case STRING:
1856        l = 0;
1857        /*
1858         * What we want here is: v = strncmp(m->value.s, p->s, m->vallen);
1859         * but ignoring any nulls.  bcmp doesn't give -/+/0 and isn't
1860         * universally available anyway.
1861         */
1862        v = 0;
1863        {
1864            register unsigned char *a = (unsigned char *) m->value.s;
1865            register unsigned char *b = (unsigned char *) p->s;
1866            register int len = m->vallen;
1867
1868            while (--len >= 0)
1869                if ((v = *b++ - *a++) != 0)
1870                    break;
1871        }
1872        break;
1873    default:
1874        /*  bogosity, pretend that it just wasn't a match */
1875        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01540)
1876                    MODNAME ": invalid type %d in mcheck().", m->type);
1877        return 0;
1878    }
1879
1880    v = signextend(r->server, m, v) & m->mask;
1881
1882    switch (m->reln) {
1883    case 'x':
1884#if MIME_MAGIC_DEBUG
1885        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01541)
1886                    "%lu == *any* = 1", v);
1887#endif
1888        matched = 1;
1889        break;
1890
1891    case '!':
1892        matched = v != l;
1893#if MIME_MAGIC_DEBUG
1894        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01542)
1895                    "%lu != %lu = %d", v, l, matched);
1896#endif
1897        break;
1898
1899    case '=':
1900        matched = v == l;
1901#if MIME_MAGIC_DEBUG
1902        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01543)
1903                    "%lu == %lu = %d", v, l, matched);
1904#endif
1905        break;
1906
1907    case '>':
1908        if (m->flag & UNSIGNED) {
1909            matched = v > l;
1910#if MIME_MAGIC_DEBUG
1911            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01544)
1912                        "%lu > %lu = %d", v, l, matched);
1913#endif
1914        }
1915        else {
1916            matched = (long) v > (long) l;
1917#if MIME_MAGIC_DEBUG
1918            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01545)
1919                        "%ld > %ld = %d", v, l, matched);
1920#endif
1921        }
1922        break;
1923
1924    case '<':
1925        if (m->flag & UNSIGNED) {
1926            matched = v < l;
1927#if MIME_MAGIC_DEBUG
1928            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01546)
1929                        "%lu < %lu = %d", v, l, matched);
1930#endif
1931        }
1932        else {
1933            matched = (long) v < (long) l;
1934#if MIME_MAGIC_DEBUG
1935            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01547)
1936                        "%ld < %ld = %d", v, l, matched);
1937#endif
1938        }
1939        break;
1940
1941    case '&':
1942        matched = (v & l) == l;
1943#if MIME_MAGIC_DEBUG
1944        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01548)
1945                    "((%lx & %lx) == %lx) = %d", v, l, l, matched);
1946#endif
1947        break;
1948
1949    case '^':
1950        matched = (v & l) != l;
1951#if MIME_MAGIC_DEBUG
1952        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01549)
1953                    "((%lx & %lx) != %lx) = %d", v, l, l, matched);
1954#endif
1955        break;
1956
1957    default:
1958        /* bogosity, pretend it didn't match */
1959        matched = 0;
1960        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01550)
1961                    MODNAME ": mcheck: can't happen: invalid relation %d.",
1962                    m->reln);
1963        break;
1964    }
1965
1966    return matched;
1967}
1968
1969/* an optimization over plain strcmp() */
1970#define    STREQ(a, b)    (*(a) == *(b) && strcmp((a), (b)) == 0)
1971
1972static int ascmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
1973{
1974    int has_escapes = 0;
1975    unsigned char *s;
1976    char nbuf[SMALL_HOWMANY + 1];  /* one extra for terminating '\0' */
1977    char *token;
1978    const struct names *p;
1979    int small_nbytes;
1980    char *strtok_state;
1981
1982    /* these are easy, do them first */
1983
1984    /*
1985     * for troff, look for . + letter + letter or .\"; this must be done to
1986     * disambiguate tar archives' ./file and other trash from real troff
1987     * input.
1988     */
1989    if (*buf == '.') {
1990        unsigned char *tp = buf + 1;
1991
1992        while (apr_isspace(*tp))
1993            ++tp;  /* skip leading whitespace */
1994        if ((apr_isalnum(*tp) || *tp == '\\') &&
1995             (apr_isalnum(*(tp + 1)) || *tp == '"')) {
1996            magic_rsl_puts(r, "application/x-troff");
1997            return 1;
1998        }
1999    }
2000    if ((*buf == 'c' || *buf == 'C') && apr_isspace(*(buf + 1))) {
2001        /* Fortran */
2002        magic_rsl_puts(r, "text/plain");
2003        return 1;
2004    }
2005
2006    /* look for tokens from names.h - this is expensive!, so we'll limit
2007     * ourselves to only SMALL_HOWMANY bytes */
2008    small_nbytes = (nbytes > SMALL_HOWMANY) ? SMALL_HOWMANY : nbytes;
2009    /* make a copy of the buffer here because apr_strtok() will destroy it */
2010    s = (unsigned char *) memcpy(nbuf, buf, small_nbytes);
2011    s[small_nbytes] = '\0';
2012    has_escapes = (memchr(s, '\033', small_nbytes) != NULL);
2013    while ((token = apr_strtok((char *) s, " \t\n\r\f", &strtok_state)) != NULL) {
2014        s = NULL;  /* make apr_strtok() keep on tokin' */
2015        for (p = names; p < names + NNAMES; p++) {
2016            if (STREQ(p->name, token)) {
2017                magic_rsl_puts(r, types[p->type]);
2018                if (has_escapes)
2019                    magic_rsl_puts(r, " (with escape sequences)");
2020                return 1;
2021            }
2022        }
2023    }
2024
2025    switch (is_tar(buf, nbytes)) {
2026    case 1:
2027        /* V7 tar archive */
2028        magic_rsl_puts(r, "application/x-tar");
2029        return 1;
2030    case 2:
2031        /* POSIX tar archive */
2032        magic_rsl_puts(r, "application/x-tar");
2033        return 1;
2034    }
2035
2036    /* all else fails, but it is ascii... */
2037    return 0;
2038}
2039
2040
2041/*
2042 * compress routines: zmagic() - returns 0 if not recognized, uncompresses
2043 * and prints information if recognized uncompress(s, method, old, n, newch)
2044 * - uncompress old into new, using method, return sizeof new
2045 */
2046
2047static struct {
2048    char *magic;
2049    apr_size_t maglen;
2050    char *argv[3];
2051    int silent;
2052    char *encoding;  /* MUST be lowercase */
2053} compr[] = {
2054
2055    /* we use gzip here rather than uncompress because we have to pass
2056     * it a full filename -- and uncompress only considers filenames
2057     * ending with .Z
2058     */
2059    {
2060        "\037\235", 2, {
2061            "gzip", "-dcq", NULL
2062        }, 0, "x-compress"
2063    },
2064    {
2065        "\037\213", 2, {
2066            "gzip", "-dcq", NULL
2067        }, 1, "x-gzip"
2068    },
2069    /*
2070     * XXX pcat does not work, cause I don't know how to make it read stdin,
2071     * so we use gzip
2072     */
2073    {
2074        "\037\036", 2, {
2075            "gzip", "-dcq", NULL
2076        }, 0, "x-gzip"
2077    },
2078};
2079
2080static int ncompr = sizeof(compr) / sizeof(compr[0]);
2081
2082static int zmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes)
2083{
2084    unsigned char *newbuf;
2085    int newsize;
2086    int i;
2087
2088    for (i = 0; i < ncompr; i++) {
2089        if (nbytes < compr[i].maglen)
2090            continue;
2091        if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0)
2092            break;
2093    }
2094
2095    if (i == ncompr)
2096        return 0;
2097
2098    if ((newsize = uncompress(r, i, &newbuf, HOWMANY)) > 0) {
2099        /* set encoding type in the request record */
2100        r->content_encoding = compr[i].encoding;
2101
2102        newbuf[newsize-1] = '\0';  /* null-terminate uncompressed data */
2103        /* Try to detect the content type of the uncompressed data */
2104        if (tryit(r, newbuf, newsize, 0) != OK) {
2105            return 0;
2106        }
2107    }
2108    return 1;
2109}
2110
2111
2112struct uncompress_parms {
2113    request_rec *r;
2114    int method;
2115};
2116
2117static int create_uncompress_child(struct uncompress_parms *parm, apr_pool_t *cntxt,
2118                                   apr_file_t **pipe_in)
2119{
2120    int rc = 1;
2121    const char *new_argv[4];
2122    request_rec *r = parm->r;
2123    apr_pool_t *child_context = cntxt;
2124    apr_procattr_t *procattr;
2125    apr_proc_t *procnew;
2126
2127    /* XXX missing 1.3 logic:
2128     *
2129     * what happens when !compr[parm->method].silent?
2130     * Should we create the err pipe, read it, and copy to the log?
2131     */
2132
2133    if ((apr_procattr_create(&procattr, child_context) != APR_SUCCESS) ||
2134        (apr_procattr_io_set(procattr, APR_FULL_BLOCK,
2135                           APR_FULL_BLOCK, APR_NO_PIPE)   != APR_SUCCESS) ||
2136        (apr_procattr_dir_set(procattr,
2137                              ap_make_dirstr_parent(r->pool, r->filename)) != APR_SUCCESS) ||
2138        (apr_procattr_cmdtype_set(procattr, APR_PROGRAM_PATH) != APR_SUCCESS)) {
2139        /* Something bad happened, tell the world. */
2140        ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, APLOGNO(01551)
2141               "couldn't setup child process: %s", r->filename);
2142    }
2143    else {
2144        new_argv[0] = compr[parm->method].argv[0];
2145        new_argv[1] = compr[parm->method].argv[1];
2146        new_argv[2] = r->filename;
2147        new_argv[3] = NULL;
2148
2149        procnew = apr_pcalloc(child_context, sizeof(*procnew));
2150        rc = apr_proc_create(procnew, compr[parm->method].argv[0],
2151                               new_argv, NULL, procattr, child_context);
2152
2153        if (rc != APR_SUCCESS) {
2154            /* Bad things happened. Everyone should have cleaned up. */
2155            ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, APLOGNO(01552)
2156                          MODNAME ": could not execute `%s'.",
2157                          compr[parm->method].argv[0]);
2158        }
2159        else {
2160            apr_pool_note_subprocess(child_context, procnew, APR_KILL_AFTER_TIMEOUT);
2161            *pipe_in = procnew->out;
2162        }
2163    }
2164
2165    return (rc);
2166}
2167
2168static int uncompress(request_rec *r, int method,
2169                      unsigned char **newch, apr_size_t n)
2170{
2171    struct uncompress_parms parm;
2172    apr_file_t *pipe_out = NULL;
2173    apr_pool_t *sub_context;
2174    apr_status_t rv;
2175
2176    parm.r = r;
2177    parm.method = method;
2178
2179    /* We make a sub_pool so that we can collect our child early, otherwise
2180     * there are cases (i.e. generating directory indicies with mod_autoindex)
2181     * where we would end up with LOTS of zombies.
2182     */
2183    if (apr_pool_create(&sub_context, r->pool) != APR_SUCCESS)
2184        return -1;
2185
2186    if ((rv = create_uncompress_child(&parm, sub_context, &pipe_out)) != APR_SUCCESS) {
2187        ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01553)
2188                    MODNAME ": couldn't spawn uncompress process: %s", r->uri);
2189        return -1;
2190    }
2191
2192    *newch = (unsigned char *) apr_palloc(r->pool, n);
2193    rv = apr_file_read(pipe_out, *newch, &n);
2194    if (n == 0) {
2195        apr_pool_destroy(sub_context);
2196        ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01554)
2197            MODNAME ": read failed from uncompress of %s", r->filename);
2198        return -1;
2199    }
2200    apr_pool_destroy(sub_context);
2201    return n;
2202}
2203
2204/*
2205 * is_tar() -- figure out whether file is a tar archive.
2206 *
2207 * Stolen (by author of file utility) from the public domain tar program: Public
2208 * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
2209 *
2210 * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7
2211 * 1997/06/24 00:41:02 ikluft Exp ikluft $
2212 *
2213 * Comments changed and some code/comments reformatted for file command by Ian
2214 * Darwin.
2215 */
2216
2217#define isodigit(c) (((unsigned char)(c) >= '0') && ((unsigned char)(c) <= '7'))
2218
2219/*
2220 * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for
2221 * old UNIX tar file, 2 for Unix Std (POSIX) tar file.
2222 */
2223
2224static int is_tar(unsigned char *buf, apr_size_t nbytes)
2225{
2226    register union record *header = (union record *) buf;
2227    register int i;
2228    register long sum, recsum;
2229    register char *p;
2230
2231    if (nbytes < sizeof(union record))
2232               return 0;
2233
2234    recsum = from_oct(8, header->header.chksum);
2235
2236    sum = 0;
2237    p = header->charptr;
2238    for (i = sizeof(union record); --i >= 0;) {
2239        /*
2240         * We can't use unsigned char here because of old compilers, e.g. V7.
2241         */
2242        sum += 0xFF & *p++;
2243    }
2244
2245    /* Adjust checksum to count the "chksum" field as blanks. */
2246    for (i = sizeof(header->header.chksum); --i >= 0;)
2247        sum -= 0xFF & header->header.chksum[i];
2248    sum += ' ' * sizeof header->header.chksum;
2249
2250    if (sum != recsum)
2251        return 0;   /* Not a tar archive */
2252
2253    if (0 == strcmp(header->header.magic, TMAGIC))
2254        return 2;   /* Unix Standard tar archive */
2255
2256    return 1;       /* Old fashioned tar archive */
2257}
2258
2259
2260/*
2261 * Quick and dirty octal conversion.
2262 *
2263 * Result is -1 if the field is invalid (all blank, or nonoctal).
2264 */
2265static long from_oct(int digs, char *where)
2266{
2267    register long value;
2268
2269    while (apr_isspace(*where)) {  /* Skip spaces */
2270        where++;
2271        if (--digs <= 0)
2272            return -1;  /* All blank field */
2273    }
2274    value = 0;
2275    while (digs > 0 && isodigit(*where)) {  /* Scan til nonoctal */
2276        value = (value << 3) | (*where++ - '0');
2277        --digs;
2278    }
2279
2280    if (digs > 0 && *where && !apr_isspace(*where))
2281        return -1;  /* Ended on non-space/nul */
2282
2283    return value;
2284}
2285
2286/*
2287 * Check for file-revision suffix
2288 *
2289 * This is for an obscure document control system used on an intranet.
2290 * The web representation of each file's revision has an @1, @2, etc
2291 * appended with the revision number.  This needs to be stripped off to
2292 * find the file suffix, which can be recognized by sending the name back
2293 * through a sub-request.  The base file name (without the @num suffix)
2294 * must exist because its type will be used as the result.
2295 */
2296static int revision_suffix(request_rec *r)
2297{
2298    int suffix_pos, result;
2299    char *sub_filename;
2300    request_rec *sub;
2301
2302#if MIME_MAGIC_DEBUG
2303    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01555)
2304                MODNAME ": revision_suffix checking %s", r->filename);
2305#endif /* MIME_MAGIC_DEBUG */
2306
2307    /* check for recognized revision suffix */
2308    suffix_pos = strlen(r->filename) - 1;
2309    if (!apr_isdigit(r->filename[suffix_pos])) {
2310        return 0;
2311    }
2312    while (suffix_pos >= 0 && apr_isdigit(r->filename[suffix_pos]))
2313        suffix_pos--;
2314    if (suffix_pos < 0 || r->filename[suffix_pos] != '@') {
2315        return 0;
2316    }
2317
2318    /* perform sub-request for the file name without the suffix */
2319    result = 0;
2320    sub_filename = apr_pstrndup(r->pool, r->filename, suffix_pos);
2321#if MIME_MAGIC_DEBUG
2322    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01556)
2323                MODNAME ": subrequest lookup for %s", sub_filename);
2324#endif /* MIME_MAGIC_DEBUG */
2325    sub = ap_sub_req_lookup_file(sub_filename, r, NULL);
2326
2327    /* extract content type/encoding/language from sub-request */
2328    if (sub->content_type) {
2329        ap_set_content_type(r, apr_pstrdup(r->pool, sub->content_type));
2330#if MIME_MAGIC_DEBUG
2331        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01557)
2332                    MODNAME ": subrequest %s got %s",
2333                    sub_filename, r->content_type);
2334#endif /* MIME_MAGIC_DEBUG */
2335        if (sub->content_encoding)
2336            r->content_encoding =
2337                apr_pstrdup(r->pool, sub->content_encoding);
2338        if (sub->content_languages) {
2339            int n;
2340            r->content_languages = apr_array_copy(r->pool,
2341                                                  sub->content_languages);
2342            for (n = 0; n < r->content_languages->nelts; ++n) {
2343                char **lang = ((char **)r->content_languages->elts) + n;
2344                *lang = apr_pstrdup(r->pool, *lang);
2345            }
2346        }
2347        result = 1;
2348    }
2349
2350    /* clean up */
2351    ap_destroy_sub_req(sub);
2352
2353    return result;
2354}
2355
2356/*
2357 * initialize the module
2358 */
2359static int magic_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
2360{
2361    int result;
2362    magic_server_config_rec *conf;
2363    magic_server_config_rec *main_conf;
2364    server_rec *s;
2365#if MIME_MAGIC_DEBUG
2366    struct magic *m, *prevm;
2367#endif /* MIME_MAGIC_DEBUG */
2368
2369    main_conf = ap_get_module_config(main_server->module_config, &mime_magic_module);
2370    for (s = main_server; s; s = s->next) {
2371        conf = ap_get_module_config(s->module_config, &mime_magic_module);
2372        if (conf->magicfile == NULL && s != main_server) {
2373            /* inherits from the parent */
2374            *conf = *main_conf;
2375        }
2376        else if (conf->magicfile) {
2377            result = apprentice(s, p);
2378            if (result == -1)
2379                return OK;
2380#if MIME_MAGIC_DEBUG
2381            prevm = 0;
2382            ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01558)
2383                        MODNAME ": magic_init 1 test");
2384            for (m = conf->magic; m; m = m->next) {
2385                if (apr_isprint((((unsigned long) m) >> 24) & 255) &&
2386                    apr_isprint((((unsigned long) m) >> 16) & 255) &&
2387                    apr_isprint((((unsigned long) m) >> 8) & 255) &&
2388                    apr_isprint(((unsigned long) m) & 255)) {
2389                    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01559)
2390                                MODNAME ": magic_init 1: POINTER CLOBBERED! "
2391                                "m=\"%c%c%c%c\" line=%d",
2392                                (((unsigned long) m) >> 24) & 255,
2393                                (((unsigned long) m) >> 16) & 255,
2394                                (((unsigned long) m) >> 8) & 255,
2395                                ((unsigned long) m) & 255,
2396                                prevm ? prevm->lineno : -1);
2397                    break;
2398                }
2399                prevm = m;
2400            }
2401#endif
2402        }
2403    }
2404    return OK;
2405}
2406
2407/*
2408 * Find the Content-Type from any resource this module has available
2409 */
2410
2411static int magic_find_ct(request_rec *r)
2412{
2413    int result;
2414    magic_server_config_rec *conf;
2415
2416    /* the file has to exist */
2417    if (r->finfo.filetype == APR_NOFILE || !r->filename) {
2418        return DECLINED;
2419    }
2420
2421    /* was someone else already here? */
2422    if (r->content_type) {
2423        return DECLINED;
2424    }
2425
2426    conf = ap_get_module_config(r->server->module_config, &mime_magic_module);
2427    if (!conf || !conf->magic) {
2428        return DECLINED;
2429    }
2430
2431    /* initialize per-request info */
2432    if (!magic_set_config(r)) {
2433        return HTTP_INTERNAL_SERVER_ERROR;
2434    }
2435
2436    /* try excluding file-revision suffixes */
2437    if (revision_suffix(r) != 1) {
2438        /* process it based on the file contents */
2439        if ((result = magic_process(r)) != OK) {
2440            return result;
2441        }
2442    }
2443
2444    /* if we have any results, put them in the request structure */
2445    return magic_rsl_to_request(r);
2446}
2447
2448static void register_hooks(apr_pool_t *p)
2449{
2450    static const char * const aszPre[]={ "mod_mime.c", NULL };
2451
2452    /* mod_mime_magic should be run after mod_mime, if at all. */
2453
2454    ap_hook_type_checker(magic_find_ct, aszPre, NULL, APR_HOOK_MIDDLE);
2455    ap_hook_post_config(magic_init, NULL, NULL, APR_HOOK_FIRST);
2456}
2457
2458/*
2459 * Apache API module interface
2460 */
2461
2462AP_DECLARE_MODULE(mime_magic) =
2463{
2464    STANDARD20_MODULE_STUFF,
2465    NULL,                      /* dir config creator */
2466    NULL,                      /* dir merger --- default is to override */
2467    create_magic_server_config,        /* server config */
2468    merge_magic_server_config, /* merge server config */
2469    mime_magic_cmds,           /* command apr_table_t */
2470    register_hooks              /* register hooks */
2471};
2472