1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * mod_mime_magic: MIME type lookup via file magic numbers 19 * Copyright (c) 1996-1997 Cisco Systems, Inc. 20 * 21 * This software was submitted by Cisco Systems to the Apache Software Foundation in July 22 * 1997. Future revisions and derivatives of this source code must 23 * acknowledge Cisco Systems as the original contributor of this module. 24 * All other licensing and usage conditions are those of the Apache Software Foundation. 25 * 26 * Some of this code is derived from the free version of the file command 27 * originally posted to comp.sources.unix. Copyright info for that program 28 * is included below as required. 29 * --------------------------------------------------------------------------- 30 * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin. 31 * 32 * This software is not subject to any license of the American Telephone and 33 * Telegraph Company or of the Regents of the University of California. 34 * 35 * Permission is granted to anyone to use this software for any purpose on any 36 * computer system, and to alter it and redistribute it freely, subject to 37 * the following restrictions: 38 * 39 * 1. The author is not responsible for the consequences of use of this 40 * software, no matter how awful, even if they arise from flaws in it. 41 * 42 * 2. The origin of this software must not be misrepresented, either by 43 * explicit claim or by omission. Since few users ever read sources, credits 44 * must appear in the documentation. 45 * 46 * 3. Altered versions must be plainly marked as such, and must not be 47 * misrepresented as being the original software. Since few users ever read 48 * sources, credits must appear in the documentation. 49 * 50 * 4. This notice may not be removed or altered. 51 * ------------------------------------------------------------------------- 52 * 53 * For compliance with Mr Darwin's terms: this has been very significantly 54 * modified from the free "file" command. 55 * - all-in-one file for compilation convenience when moving from one 56 * version of Apache to the next. 57 * - Memory allocation is done through the Apache API's apr_pool_t structure. 58 * - All functions have had necessary Apache API request or server 59 * structures passed to them where necessary to call other Apache API 60 * routines. (i.e. usually for logging, files, or memory allocation in 61 * itself or a called function.) 62 * - struct magic has been converted from an array to a single-ended linked 63 * list because it only grows one record at a time, it's only accessed 64 * sequentially, and the Apache API has no equivalent of realloc(). 65 * - Functions have been changed to get their parameters from the server 66 * configuration instead of globals. (It should be reentrant now but has 67 * not been tested in a threaded environment.) 68 * - Places where it used to print results to stdout now saves them in a 69 * list where they're used to set the MIME type in the Apache request 70 * record. 71 * - Command-line flags have been removed since they will never be used here. 72 * 73 * Ian Kluft <ikluft@cisco.com> 74 * Engineering Information Framework 75 * Central Engineering 76 * Cisco Systems, Inc. 77 * San Jose, CA, USA 78 * 79 * Initial installation July/August 1996 80 * Misc bug fixes May 1997 81 * Submission to Apache Software Foundation July 1997 82 * 83 */ 84 85#include "apr.h" 86#include "apr_strings.h" 87#include "apr_lib.h" 88#define APR_WANT_STRFUNC 89#include "apr_want.h" 90 91#if APR_HAVE_UNISTD_H 92#include <unistd.h> 93#endif 94 95#include "ap_config.h" 96#include "httpd.h" 97#include "http_config.h" 98#include "http_request.h" 99#include "http_core.h" 100#include "http_log.h" 101#include "http_protocol.h" 102#include "util_script.h" 103 104/* ### this isn't set by configure? does anybody set this? */ 105#ifdef HAVE_UTIME_H 106#include <utime.h> 107#endif 108 109/* 110 * data structures and related constants 111 */ 112 113#define MODNAME "mod_mime_magic" 114#define MIME_MAGIC_DEBUG 0 115 116#define MIME_BINARY_UNKNOWN "application/octet-stream" 117#define MIME_TEXT_UNKNOWN "text/plain" 118 119#define MAXMIMESTRING 256 120 121/* HOWMANY must be at least 4096 to make gzip -dcq work */ 122#define HOWMANY 4096 123/* SMALL_HOWMANY limits how much work we do to figure out text files */ 124#define SMALL_HOWMANY 1024 125#define MAXDESC 50 /* max leng of text description */ 126#define MAXstring 64 /* max leng of "string" types */ 127 128struct magic { 129 struct magic *next; /* link to next entry */ 130 int lineno; /* line number from magic file */ 131 132 short flag; 133#define INDIR 1 /* if '>(...)' appears, */ 134#define UNSIGNED 2 /* comparison is unsigned */ 135 short cont_level; /* level of ">" */ 136 struct { 137 char type; /* byte short long */ 138 long offset; /* offset from indirection */ 139 } in; 140 long offset; /* offset to magic number */ 141 unsigned char reln; /* relation (0=eq, '>'=gt, etc) */ 142 char type; /* int, short, long or string. */ 143 char vallen; /* length of string value, if any */ 144#define BYTE 1 145#define SHORT 2 146#define LONG 4 147#define STRING 5 148#define DATE 6 149#define BESHORT 7 150#define BELONG 8 151#define BEDATE 9 152#define LESHORT 10 153#define LELONG 11 154#define LEDATE 12 155 union VALUETYPE { 156 unsigned char b; 157 unsigned short h; 158 unsigned long l; 159 char s[MAXstring]; 160 unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */ 161 unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */ 162 } value; /* either number or string */ 163 unsigned long mask; /* mask before comparison with value */ 164 char nospflag; /* supress space character */ 165 166 /* NOTE: this string is suspected of overrunning - find it! */ 167 char desc[MAXDESC]; /* description */ 168}; 169 170/* 171 * data structures for tar file recognition 172 * -------------------------------------------------------------------------- 173 * Header file for public domain tar (tape archive) program. 174 * 175 * @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John 176 * Gilmore, ihnp4!hoptoad!gnu. 177 * 178 * Header block on tape. 179 * 180 * I'm going to use traditional DP naming conventions here. A "block" is a big 181 * chunk of stuff that we do I/O on. A "record" is a piece of info that we 182 * care about. Typically many "record"s fit into a "block". 183 */ 184#define RECORDSIZE 512 185#define NAMSIZ 100 186#define TUNMLEN 32 187#define TGNMLEN 32 188 189union record { 190 char charptr[RECORDSIZE]; 191 struct header { 192 char name[NAMSIZ]; 193 char mode[8]; 194 char uid[8]; 195 char gid[8]; 196 char size[12]; 197 char mtime[12]; 198 char chksum[8]; 199 char linkflag; 200 char linkname[NAMSIZ]; 201 char magic[8]; 202 char uname[TUNMLEN]; 203 char gname[TGNMLEN]; 204 char devmajor[8]; 205 char devminor[8]; 206 } header; 207}; 208 209/* The magic field is filled with this if uname and gname are valid. */ 210#define TMAGIC "ustar " /* 7 chars and a null */ 211 212/* 213 * file-function prototypes 214 */ 215static int ascmagic(request_rec *, unsigned char *, apr_size_t); 216static int is_tar(unsigned char *, apr_size_t); 217static int softmagic(request_rec *, unsigned char *, apr_size_t); 218static int tryit(request_rec *, unsigned char *, apr_size_t, int); 219static int zmagic(request_rec *, unsigned char *, apr_size_t); 220 221static int getvalue(server_rec *, struct magic *, char **); 222static int hextoint(int); 223static char *getstr(server_rec *, char *, char *, int, int *); 224static int parse(server_rec *, apr_pool_t *p, char *, int); 225 226static int match(request_rec *, unsigned char *, apr_size_t); 227static int mget(request_rec *, union VALUETYPE *, unsigned char *, 228 struct magic *, apr_size_t); 229static int mcheck(request_rec *, union VALUETYPE *, struct magic *); 230static void mprint(request_rec *, union VALUETYPE *, struct magic *); 231 232static int uncompress(request_rec *, int, 233 unsigned char **, apr_size_t); 234static long from_oct(int, char *); 235static int fsmagic(request_rec *r, const char *fn); 236 237/* 238 * includes for ASCII substring recognition formerly "names.h" in file 239 * command 240 * 241 * Original notes: names and types used by ascmagic in file(1). These tokens are 242 * here because they can appear anywhere in the first HOWMANY bytes, while 243 * tokens in /etc/magic must appear at fixed offsets into the file. Don't 244 * make HOWMANY too high unless you have a very fast CPU. 245 */ 246 247/* these types are used to index the apr_table_t 'types': keep em in sync! */ 248/* HTML inserted in first because this is a web server module now */ 249#define L_HTML 0 /* HTML */ 250#define L_C 1 /* first and foremost on UNIX */ 251#define L_FORT 2 /* the oldest one */ 252#define L_MAKE 3 /* Makefiles */ 253#define L_PLI 4 /* PL/1 */ 254#define L_MACH 5 /* some kinda assembler */ 255#define L_ENG 6 /* English */ 256#define L_PAS 7 /* Pascal */ 257#define L_MAIL 8 /* Electronic mail */ 258#define L_NEWS 9 /* Usenet Netnews */ 259 260static char *types[] = 261{ 262 "text/html", /* HTML */ 263 "text/plain", /* "c program text", */ 264 "text/plain", /* "fortran program text", */ 265 "text/plain", /* "make commands text", */ 266 "text/plain", /* "pl/1 program text", */ 267 "text/plain", /* "assembler program text", */ 268 "text/plain", /* "English text", */ 269 "text/plain", /* "pascal program text", */ 270 "message/rfc822", /* "mail text", */ 271 "message/news", /* "news text", */ 272 "application/binary", /* "can't happen error on names.h/types", */ 273 0 274}; 275 276static struct names { 277 char *name; 278 short type; 279} names[] = { 280 281 /* These must be sorted by eye for optimal hit rate */ 282 /* Add to this list only after substantial meditation */ 283 { 284 "<html>", L_HTML 285 }, 286 { 287 "<HTML>", L_HTML 288 }, 289 { 290 "<head>", L_HTML 291 }, 292 { 293 "<HEAD>", L_HTML 294 }, 295 { 296 "<title>", L_HTML 297 }, 298 { 299 "<TITLE>", L_HTML 300 }, 301 { 302 "<h1>", L_HTML 303 }, 304 { 305 "<H1>", L_HTML 306 }, 307 { 308 "<!--", L_HTML 309 }, 310 { 311 "<!DOCTYPE HTML", L_HTML 312 }, 313 { 314 "/*", L_C 315 }, /* must precede "The", "the", etc. */ 316 { 317 "#include", L_C 318 }, 319 { 320 "char", L_C 321 }, 322 { 323 "The", L_ENG 324 }, 325 { 326 "the", L_ENG 327 }, 328 { 329 "double", L_C 330 }, 331 { 332 "extern", L_C 333 }, 334 { 335 "float", L_C 336 }, 337 { 338 "real", L_C 339 }, 340 { 341 "struct", L_C 342 }, 343 { 344 "union", L_C 345 }, 346 { 347 "CFLAGS", L_MAKE 348 }, 349 { 350 "LDFLAGS", L_MAKE 351 }, 352 { 353 "all:", L_MAKE 354 }, 355 { 356 ".PRECIOUS", L_MAKE 357 }, 358 /* 359 * Too many files of text have these words in them. Find another way to 360 * recognize Fortrash. 361 */ 362#ifdef NOTDEF 363 { 364 "subroutine", L_FORT 365 }, 366 { 367 "function", L_FORT 368 }, 369 { 370 "block", L_FORT 371 }, 372 { 373 "common", L_FORT 374 }, 375 { 376 "dimension", L_FORT 377 }, 378 { 379 "integer", L_FORT 380 }, 381 { 382 "data", L_FORT 383 }, 384#endif /* NOTDEF */ 385 { 386 ".ascii", L_MACH 387 }, 388 { 389 ".asciiz", L_MACH 390 }, 391 { 392 ".byte", L_MACH 393 }, 394 { 395 ".even", L_MACH 396 }, 397 { 398 ".globl", L_MACH 399 }, 400 { 401 "clr", L_MACH 402 }, 403 { 404 "(input,", L_PAS 405 }, 406 { 407 "dcl", L_PLI 408 }, 409 { 410 "Received:", L_MAIL 411 }, 412 { 413 ">From", L_MAIL 414 }, 415 { 416 "Return-Path:", L_MAIL 417 }, 418 { 419 "Cc:", L_MAIL 420 }, 421 { 422 "Newsgroups:", L_NEWS 423 }, 424 { 425 "Path:", L_NEWS 426 }, 427 { 428 "Organization:", L_NEWS 429 }, 430 { 431 NULL, 0 432 } 433}; 434 435#define NNAMES ((sizeof(names)/sizeof(struct names)) - 1) 436 437/* 438 * Result String List (RSL) 439 * 440 * The file(1) command prints its output. Instead, we store the various 441 * "printed" strings in a list (allocating memory as we go) and concatenate 442 * them at the end when we finally know how much space they'll need. 443 */ 444 445typedef struct magic_rsl_s { 446 char *str; /* string, possibly a fragment */ 447 struct magic_rsl_s *next; /* pointer to next fragment */ 448} magic_rsl; 449 450/* 451 * Apache module configuration structures 452 */ 453 454/* per-server info */ 455typedef struct { 456 const char *magicfile; /* where magic be found */ 457 struct magic *magic; /* head of magic config list */ 458 struct magic *last; 459} magic_server_config_rec; 460 461/* per-request info */ 462typedef struct { 463 magic_rsl *head; /* result string list */ 464 magic_rsl *tail; 465 unsigned suf_recursion; /* recursion depth in suffix check */ 466} magic_req_rec; 467 468/* 469 * configuration functions - called by Apache API routines 470 */ 471 472module AP_MODULE_DECLARE_DATA mime_magic_module; 473 474static void *create_magic_server_config(apr_pool_t *p, server_rec *d) 475{ 476 /* allocate the config - use pcalloc because it needs to be zeroed */ 477 return apr_pcalloc(p, sizeof(magic_server_config_rec)); 478} 479 480static void *merge_magic_server_config(apr_pool_t *p, void *basev, void *addv) 481{ 482 magic_server_config_rec *base = (magic_server_config_rec *) basev; 483 magic_server_config_rec *add = (magic_server_config_rec *) addv; 484 magic_server_config_rec *new = (magic_server_config_rec *) 485 apr_palloc(p, sizeof(magic_server_config_rec)); 486 487 new->magicfile = add->magicfile ? add->magicfile : base->magicfile; 488 new->magic = NULL; 489 new->last = NULL; 490 return new; 491} 492 493static const char *set_magicfile(cmd_parms *cmd, void *dummy, const char *arg) 494{ 495 magic_server_config_rec *conf = (magic_server_config_rec *) 496 ap_get_module_config(cmd->server->module_config, 497 &mime_magic_module); 498 499 if (!conf) { 500 return MODNAME ": server structure not allocated"; 501 } 502 conf->magicfile = arg; 503 return NULL; 504} 505 506/* 507 * configuration file commands - exported to Apache API 508 */ 509 510static const command_rec mime_magic_cmds[] = 511{ 512 AP_INIT_TAKE1("MimeMagicFile", set_magicfile, NULL, RSRC_CONF, 513 "Path to MIME Magic file (in file(1) format)"), 514 {NULL} 515}; 516 517/* 518 * RSL (result string list) processing routines 519 * 520 * These collect strings that would have been printed in fragments by file(1) 521 * into a list of magic_rsl structures with the strings. When complete, 522 * they're concatenated together to become the MIME content and encoding 523 * types. 524 * 525 * return value conventions for these functions: functions which return int: 526 * failure = -1, other = result functions which return pointers: failure = 0, 527 * other = result 528 */ 529 530/* allocate a per-request structure and put it in the request record */ 531static magic_req_rec *magic_set_config(request_rec *r) 532{ 533 magic_req_rec *req_dat = (magic_req_rec *) apr_palloc(r->pool, 534 sizeof(magic_req_rec)); 535 536 req_dat->head = req_dat->tail = (magic_rsl *) NULL; 537 ap_set_module_config(r->request_config, &mime_magic_module, req_dat); 538 return req_dat; 539} 540 541/* add a string to the result string list for this request */ 542/* it is the responsibility of the caller to allocate "str" */ 543static int magic_rsl_add(request_rec *r, char *str) 544{ 545 magic_req_rec *req_dat = (magic_req_rec *) 546 ap_get_module_config(r->request_config, &mime_magic_module); 547 magic_rsl *rsl; 548 549 /* make sure we have a list to put it in */ 550 if (!req_dat) { 551 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EINVAL, r, 552 MODNAME ": request config should not be NULL"); 553 if (!(req_dat = magic_set_config(r))) { 554 /* failure */ 555 return -1; 556 } 557 } 558 559 /* allocate the list entry */ 560 rsl = (magic_rsl *) apr_palloc(r->pool, sizeof(magic_rsl)); 561 562 /* fill it */ 563 rsl->str = str; 564 rsl->next = (magic_rsl *) NULL; 565 566 /* append to the list */ 567 if (req_dat->head && req_dat->tail) { 568 req_dat->tail->next = rsl; 569 req_dat->tail = rsl; 570 } 571 else { 572 req_dat->head = req_dat->tail = rsl; 573 } 574 575 /* success */ 576 return 0; 577} 578 579/* RSL hook for puts-type functions */ 580static int magic_rsl_puts(request_rec *r, char *str) 581{ 582 return magic_rsl_add(r, str); 583} 584 585/* RSL hook for printf-type functions */ 586static int magic_rsl_printf(request_rec *r, char *str,...) 587{ 588 va_list ap; 589 590 char buf[MAXMIMESTRING]; 591 592 /* assemble the string into the buffer */ 593 va_start(ap, str); 594 apr_vsnprintf(buf, sizeof(buf), str, ap); 595 va_end(ap); 596 597 /* add the buffer to the list */ 598 return magic_rsl_add(r, apr_pstrdup(r->pool, buf)); 599} 600 601/* RSL hook for putchar-type functions */ 602static int magic_rsl_putchar(request_rec *r, char c) 603{ 604 char str[2]; 605 606 /* high overhead for 1 char - just hope they don't do this much */ 607 str[0] = c; 608 str[1] = '\0'; 609 return magic_rsl_add(r, str); 610} 611 612/* allocate and copy a contiguous string from a result string list */ 613static char *rsl_strdup(request_rec *r, int start_frag, int start_pos, int len) 614{ 615 char *result; /* return value */ 616 int cur_frag, /* current fragment number/counter */ 617 cur_pos, /* current position within fragment */ 618 res_pos; /* position in result string */ 619 magic_rsl *frag; /* list-traversal pointer */ 620 magic_req_rec *req_dat = (magic_req_rec *) 621 ap_get_module_config(r->request_config, &mime_magic_module); 622 623 /* allocate the result string */ 624 result = (char *) apr_palloc(r->pool, len + 1); 625 626 /* loop through and collect the string */ 627 res_pos = 0; 628 for (frag = req_dat->head, cur_frag = 0; 629 frag->next; 630 frag = frag->next, cur_frag++) { 631 /* loop to the first fragment */ 632 if (cur_frag < start_frag) 633 continue; 634 635 /* loop through and collect chars */ 636 for (cur_pos = (cur_frag == start_frag) ? start_pos : 0; 637 frag->str[cur_pos]; 638 cur_pos++) { 639 if (cur_frag >= start_frag 640 && cur_pos >= start_pos 641 && res_pos <= len) { 642 result[res_pos++] = frag->str[cur_pos]; 643 if (res_pos > len) { 644 break; 645 } 646 } 647 } 648 } 649 650 /* clean up and return */ 651 result[res_pos] = 0; 652#if MIME_MAGIC_DEBUG 653 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 654 MODNAME ": rsl_strdup() %d chars: %s", res_pos - 1, result); 655#endif 656 return result; 657} 658 659/* states for the state-machine algorithm in magic_rsl_to_request() */ 660typedef enum { 661 rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding 662} rsl_states; 663 664/* process the RSL and set the MIME info in the request record */ 665static int magic_rsl_to_request(request_rec *r) 666{ 667 int cur_frag, /* current fragment number/counter */ 668 cur_pos, /* current position within fragment */ 669 type_frag, /* content type starting point: fragment */ 670 type_pos, /* content type starting point: position */ 671 type_len, /* content type length */ 672 encoding_frag, /* content encoding starting point: fragment */ 673 encoding_pos, /* content encoding starting point: position */ 674 encoding_len; /* content encoding length */ 675 676 magic_rsl *frag; /* list-traversal pointer */ 677 rsl_states state; 678 679 magic_req_rec *req_dat = (magic_req_rec *) 680 ap_get_module_config(r->request_config, &mime_magic_module); 681 682 /* check if we have a result */ 683 if (!req_dat || !req_dat->head) { 684 /* empty - no match, we defer to other Apache modules */ 685 return DECLINED; 686 } 687 688 /* start searching for the type and encoding */ 689 state = rsl_leading_space; 690 type_frag = type_pos = type_len = 0; 691 encoding_frag = encoding_pos = encoding_len = 0; 692 for (frag = req_dat->head, cur_frag = 0; 693 frag && frag->next; 694 frag = frag->next, cur_frag++) { 695 /* loop through the characters in the fragment */ 696 for (cur_pos = 0; frag->str[cur_pos]; cur_pos++) { 697 if (apr_isspace(frag->str[cur_pos])) { 698 /* process whitespace actions for each state */ 699 if (state == rsl_leading_space) { 700 /* eat whitespace in this state */ 701 continue; 702 } 703 else if (state == rsl_type) { 704 /* whitespace: type has no slash! */ 705 return DECLINED; 706 } 707 else if (state == rsl_subtype) { 708 /* whitespace: end of MIME type */ 709 state++; 710 continue; 711 } 712 else if (state == rsl_separator) { 713 /* eat whitespace in this state */ 714 continue; 715 } 716 else if (state == rsl_encoding) { 717 /* whitespace: end of MIME encoding */ 718 /* we're done */ 719 frag = req_dat->tail; 720 break; 721 } 722 else { 723 /* should not be possible */ 724 /* abandon malfunctioning module */ 725 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 726 MODNAME ": bad state %d (ws)", state); 727 return DECLINED; 728 } 729 /* NOTREACHED */ 730 } 731 else if (state == rsl_type && 732 frag->str[cur_pos] == '/') { 733 /* copy the char and go to rsl_subtype state */ 734 type_len++; 735 state++; 736 } 737 else { 738 /* process non-space actions for each state */ 739 if (state == rsl_leading_space) { 740 /* non-space: begin MIME type */ 741 state++; 742 type_frag = cur_frag; 743 type_pos = cur_pos; 744 type_len = 1; 745 continue; 746 } 747 else if (state == rsl_type || 748 state == rsl_subtype) { 749 /* non-space: adds to type */ 750 type_len++; 751 continue; 752 } 753 else if (state == rsl_separator) { 754 /* non-space: begin MIME encoding */ 755 state++; 756 encoding_frag = cur_frag; 757 encoding_pos = cur_pos; 758 encoding_len = 1; 759 continue; 760 } 761 else if (state == rsl_encoding) { 762 /* non-space: adds to encoding */ 763 encoding_len++; 764 continue; 765 } 766 else { 767 /* should not be possible */ 768 /* abandon malfunctioning module */ 769 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 770 MODNAME ": bad state %d (ns)", state); 771 return DECLINED; 772 } 773 /* NOTREACHED */ 774 } 775 /* NOTREACHED */ 776 } 777 } 778 779 /* if we ended prior to state rsl_subtype, we had incomplete info */ 780 if (state != rsl_subtype && state != rsl_separator && 781 state != rsl_encoding) { 782 /* defer to other modules */ 783 return DECLINED; 784 } 785 786 /* save the info in the request record */ 787 if (state == rsl_subtype || state == rsl_encoding || 788 state == rsl_encoding) { 789 char *tmp; 790 tmp = rsl_strdup(r, type_frag, type_pos, type_len); 791 /* XXX: this could be done at config time I'm sure... but I'm 792 * confused by all this magic_rsl stuff. -djg */ 793 ap_content_type_tolower(tmp); 794 ap_set_content_type(r, tmp); 795 } 796 if (state == rsl_encoding) { 797 char *tmp; 798 tmp = rsl_strdup(r, encoding_frag, 799 encoding_pos, encoding_len); 800 /* XXX: this could be done at config time I'm sure... but I'm 801 * confused by all this magic_rsl stuff. -djg */ 802 ap_str_tolower(tmp); 803 r->content_encoding = tmp; 804 } 805 806 /* detect memory allocation or other errors */ 807 if (!r->content_type || 808 (state == rsl_encoding && !r->content_encoding)) { 809 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 810 MODNAME ": unexpected state %d; could be caused by bad " 811 "data in magic file", 812 state); 813 return HTTP_INTERNAL_SERVER_ERROR; 814 } 815 816 /* success! */ 817 return OK; 818} 819 820/* 821 * magic_process - process input file r Apache API request record 822 * (formerly called "process" in file command, prefix added for clarity) Opens 823 * the file and reads a fixed-size buffer to begin processing the contents. 824 */ 825static int magic_process(request_rec *r) 826{ 827 apr_file_t *fd = NULL; 828 unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ 829 apr_size_t nbytes = 0; /* number of bytes read from a datafile */ 830 int result; 831 832 /* 833 * first try judging the file based on its filesystem status 834 */ 835 switch ((result = fsmagic(r, r->filename))) { 836 case DONE: 837 magic_rsl_putchar(r, '\n'); 838 return OK; 839 case OK: 840 break; 841 default: 842 /* fatal error, bail out */ 843 return result; 844 } 845 846 if (apr_file_open(&fd, r->filename, APR_READ, APR_OS_DEFAULT, r->pool) != APR_SUCCESS) { 847 /* We can't open it, but we were able to stat it. */ 848 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 849 MODNAME ": can't read `%s'", r->filename); 850 /* let some other handler decide what the problem is */ 851 return DECLINED; 852 } 853 854 /* 855 * try looking at the first HOWMANY bytes 856 */ 857 nbytes = sizeof(buf) - 1; 858 if ((result = apr_file_read(fd, (char *) buf, &nbytes)) != APR_SUCCESS) { 859 ap_log_rerror(APLOG_MARK, APLOG_ERR, result, r, 860 MODNAME ": read failed: %s", r->filename); 861 return HTTP_INTERNAL_SERVER_ERROR; 862 } 863 864 if (nbytes == 0) { 865 return DECLINED; 866 } 867 else { 868 buf[nbytes++] = '\0'; /* null-terminate it */ 869 result = tryit(r, buf, nbytes, 1); 870 if (result != OK) { 871 return result; 872 } 873 } 874 875 (void) apr_file_close(fd); 876 (void) magic_rsl_putchar(r, '\n'); 877 878 return OK; 879} 880 881 882static int tryit(request_rec *r, unsigned char *buf, apr_size_t nb, 883 int checkzmagic) 884{ 885 /* 886 * Try compression stuff 887 */ 888 if (checkzmagic == 1) { 889 if (zmagic(r, buf, nb) == 1) 890 return OK; 891 } 892 893 /* 894 * try tests in /etc/magic (or surrogate magic file) 895 */ 896 if (softmagic(r, buf, nb) == 1) 897 return OK; 898 899 /* 900 * try known keywords, check for ascii-ness too. 901 */ 902 if (ascmagic(r, buf, nb) == 1) 903 return OK; 904 905 /* 906 * abandon hope, all ye who remain here 907 */ 908 return DECLINED; 909} 910 911#define EATAB {while (apr_isspace(*l)) ++l;} 912 913/* 914 * apprentice - load configuration from the magic file r 915 * API request record 916 */ 917static int apprentice(server_rec *s, apr_pool_t *p) 918{ 919 apr_file_t *f = NULL; 920 apr_status_t result; 921 char line[BUFSIZ + 1]; 922 int errs = 0; 923 int lineno; 924#if MIME_MAGIC_DEBUG 925 int rule = 0; 926 struct magic *m, *prevm; 927#endif 928 magic_server_config_rec *conf = (magic_server_config_rec *) 929 ap_get_module_config(s->module_config, &mime_magic_module); 930 const char *fname = ap_server_root_relative(p, conf->magicfile); 931 932 if (!fname) { 933 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EBADPATH, s, 934 MODNAME ": Invalid magic file path %s", conf->magicfile); 935 return -1; 936 } 937 if ((result = apr_file_open(&f, fname, APR_READ | APR_BUFFERED, 938 APR_OS_DEFAULT, p)) != APR_SUCCESS) { 939 ap_log_error(APLOG_MARK, APLOG_ERR, result, s, 940 MODNAME ": can't read magic file %s", fname); 941 return -1; 942 } 943 944 /* set up the magic list (empty) */ 945 conf->magic = conf->last = NULL; 946 947 /* parse it */ 948 for (lineno = 1; apr_file_gets(line, BUFSIZ, f) == APR_SUCCESS; lineno++) { 949 int ws_offset; 950 char *last = line + strlen(line) - 1; /* guaranteed that len >= 1 since an 951 * "empty" line contains a '\n' 952 */ 953 954 /* delete newline and any other trailing whitespace */ 955 while (last >= line 956 && apr_isspace(*last)) { 957 *last = '\0'; 958 --last; 959 } 960 961 /* skip leading whitespace */ 962 ws_offset = 0; 963 while (line[ws_offset] && apr_isspace(line[ws_offset])) { 964 ws_offset++; 965 } 966 967 /* skip blank lines */ 968 if (line[ws_offset] == 0) { 969 continue; 970 } 971 972 /* comment, do not parse */ 973 if (line[ws_offset] == '#') 974 continue; 975 976#if MIME_MAGIC_DEBUG 977 /* if we get here, we're going to use it so count it */ 978 rule++; 979#endif 980 981 /* parse it */ 982 if (parse(s, p, line + ws_offset, lineno) != 0) 983 ++errs; 984 } 985 986 (void) apr_file_close(f); 987 988#if MIME_MAGIC_DEBUG 989 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, 990 MODNAME ": apprentice conf=%x file=%s m=%s m->next=%s last=%s", 991 conf, 992 conf->magicfile ? conf->magicfile : "NULL", 993 conf->magic ? "set" : "NULL", 994 (conf->magic && conf->magic->next) ? "set" : "NULL", 995 conf->last ? "set" : "NULL"); 996 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, 997 MODNAME ": apprentice read %d lines, %d rules, %d errors", 998 lineno, rule, errs); 999#endif 1000 1001#if MIME_MAGIC_DEBUG 1002 prevm = 0; 1003 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, 1004 MODNAME ": apprentice test"); 1005 for (m = conf->magic; m; m = m->next) { 1006 if (apr_isprint((((unsigned long) m) >> 24) & 255) && 1007 apr_isprint((((unsigned long) m) >> 16) & 255) && 1008 apr_isprint((((unsigned long) m) >> 8) & 255) && 1009 apr_isprint(((unsigned long) m) & 255)) { 1010 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, 1011 MODNAME ": apprentice: POINTER CLOBBERED! " 1012 "m=\"%c%c%c%c\" line=%d", 1013 (((unsigned long) m) >> 24) & 255, 1014 (((unsigned long) m) >> 16) & 255, 1015 (((unsigned long) m) >> 8) & 255, 1016 ((unsigned long) m) & 255, 1017 prevm ? prevm->lineno : -1); 1018 break; 1019 } 1020 prevm = m; 1021 } 1022#endif 1023 1024 return (errs ? -1 : 0); 1025} 1026 1027/* 1028 * extend the sign bit if the comparison is to be signed 1029 */ 1030static unsigned long signextend(server_rec *s, struct magic *m, unsigned long v) 1031{ 1032 if (!(m->flag & UNSIGNED)) 1033 switch (m->type) { 1034 /* 1035 * Do not remove the casts below. They are vital. When later 1036 * compared with the data, the sign extension must have happened. 1037 */ 1038 case BYTE: 1039 v = (char) v; 1040 break; 1041 case SHORT: 1042 case BESHORT: 1043 case LESHORT: 1044 v = (short) v; 1045 break; 1046 case DATE: 1047 case BEDATE: 1048 case LEDATE: 1049 case LONG: 1050 case BELONG: 1051 case LELONG: 1052 v = (long) v; 1053 break; 1054 case STRING: 1055 break; 1056 default: 1057 ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, 1058 MODNAME ": can't happen: m->type=%d", m->type); 1059 return -1; 1060 } 1061 return v; 1062} 1063 1064/* 1065 * parse one line from magic file, put into magic[index++] if valid 1066 */ 1067static int parse(server_rec *serv, apr_pool_t *p, char *l, int lineno) 1068{ 1069 struct magic *m; 1070 char *t, *s; 1071 magic_server_config_rec *conf = (magic_server_config_rec *) 1072 ap_get_module_config(serv->module_config, &mime_magic_module); 1073 1074 /* allocate magic structure entry */ 1075 m = (struct magic *) apr_pcalloc(p, sizeof(struct magic)); 1076 1077 /* append to linked list */ 1078 m->next = NULL; 1079 if (!conf->magic || !conf->last) { 1080 conf->magic = conf->last = m; 1081 } 1082 else { 1083 conf->last->next = m; 1084 conf->last = m; 1085 } 1086 1087 /* set values in magic structure */ 1088 m->flag = 0; 1089 m->cont_level = 0; 1090 m->lineno = lineno; 1091 1092 while (*l == '>') { 1093 ++l; /* step over */ 1094 m->cont_level++; 1095 } 1096 1097 if (m->cont_level != 0 && *l == '(') { 1098 ++l; /* step over */ 1099 m->flag |= INDIR; 1100 } 1101 1102 /* get offset, then skip over it */ 1103 m->offset = (int) strtol(l, &t, 0); 1104 if (l == t) { 1105 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, 1106 MODNAME ": offset %s invalid", l); 1107 } 1108 l = t; 1109 1110 if (m->flag & INDIR) { 1111 m->in.type = LONG; 1112 m->in.offset = 0; 1113 /* 1114 * read [.lbs][+-]nnnnn) 1115 */ 1116 if (*l == '.') { 1117 switch (*++l) { 1118 case 'l': 1119 m->in.type = LONG; 1120 break; 1121 case 's': 1122 m->in.type = SHORT; 1123 break; 1124 case 'b': 1125 m->in.type = BYTE; 1126 break; 1127 default: 1128 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, 1129 MODNAME ": indirect offset type %c invalid", *l); 1130 break; 1131 } 1132 l++; 1133 } 1134 s = l; 1135 if (*l == '+' || *l == '-') 1136 l++; 1137 if (apr_isdigit((unsigned char) *l)) { 1138 m->in.offset = strtol(l, &t, 0); 1139 if (*s == '-') 1140 m->in.offset = -m->in.offset; 1141 } 1142 else 1143 t = l; 1144 if (*t++ != ')') { 1145 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, 1146 MODNAME ": missing ')' in indirect offset"); 1147 } 1148 l = t; 1149 } 1150 1151 1152 while (apr_isdigit((unsigned char) *l)) 1153 ++l; 1154 EATAB; 1155 1156#define NBYTE 4 1157#define NSHORT 5 1158#define NLONG 4 1159#define NSTRING 6 1160#define NDATE 4 1161#define NBESHORT 7 1162#define NBELONG 6 1163#define NBEDATE 6 1164#define NLESHORT 7 1165#define NLELONG 6 1166#define NLEDATE 6 1167 1168 if (*l == 'u') { 1169 ++l; 1170 m->flag |= UNSIGNED; 1171 } 1172 1173 /* get type, skip it */ 1174 if (strncmp(l, "byte", NBYTE) == 0) { 1175 m->type = BYTE; 1176 l += NBYTE; 1177 } 1178 else if (strncmp(l, "short", NSHORT) == 0) { 1179 m->type = SHORT; 1180 l += NSHORT; 1181 } 1182 else if (strncmp(l, "long", NLONG) == 0) { 1183 m->type = LONG; 1184 l += NLONG; 1185 } 1186 else if (strncmp(l, "string", NSTRING) == 0) { 1187 m->type = STRING; 1188 l += NSTRING; 1189 } 1190 else if (strncmp(l, "date", NDATE) == 0) { 1191 m->type = DATE; 1192 l += NDATE; 1193 } 1194 else if (strncmp(l, "beshort", NBESHORT) == 0) { 1195 m->type = BESHORT; 1196 l += NBESHORT; 1197 } 1198 else if (strncmp(l, "belong", NBELONG) == 0) { 1199 m->type = BELONG; 1200 l += NBELONG; 1201 } 1202 else if (strncmp(l, "bedate", NBEDATE) == 0) { 1203 m->type = BEDATE; 1204 l += NBEDATE; 1205 } 1206 else if (strncmp(l, "leshort", NLESHORT) == 0) { 1207 m->type = LESHORT; 1208 l += NLESHORT; 1209 } 1210 else if (strncmp(l, "lelong", NLELONG) == 0) { 1211 m->type = LELONG; 1212 l += NLELONG; 1213 } 1214 else if (strncmp(l, "ledate", NLEDATE) == 0) { 1215 m->type = LEDATE; 1216 l += NLEDATE; 1217 } 1218 else { 1219 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, 1220 MODNAME ": type %s invalid", l); 1221 return -1; 1222 } 1223 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1224 if (*l == '&') { 1225 ++l; 1226 m->mask = signextend(serv, m, strtol(l, &l, 0)); 1227 } 1228 else 1229 m->mask = ~0L; 1230 EATAB; 1231 1232 switch (*l) { 1233 case '>': 1234 case '<': 1235 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 1236 case '&': 1237 case '^': 1238 case '=': 1239 m->reln = *l; 1240 ++l; 1241 break; 1242 case '!': 1243 if (m->type != STRING) { 1244 m->reln = *l; 1245 ++l; 1246 break; 1247 } 1248 /* FALL THROUGH */ 1249 default: 1250 if (*l == 'x' && apr_isspace(l[1])) { 1251 m->reln = *l; 1252 ++l; 1253 goto GetDesc; /* Bill The Cat */ 1254 } 1255 m->reln = '='; 1256 break; 1257 } 1258 EATAB; 1259 1260 if (getvalue(serv, m, &l)) 1261 return -1; 1262 /* 1263 * now get last part - the description 1264 */ 1265 GetDesc: 1266 EATAB; 1267 if (l[0] == '\b') { 1268 ++l; 1269 m->nospflag = 1; 1270 } 1271 else if ((l[0] == '\\') && (l[1] == 'b')) { 1272 ++l; 1273 ++l; 1274 m->nospflag = 1; 1275 } 1276 else 1277 m->nospflag = 0; 1278 strncpy(m->desc, l, sizeof(m->desc) - 1); 1279 m->desc[sizeof(m->desc) - 1] = '\0'; 1280 1281#if MIME_MAGIC_DEBUG 1282 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, serv, 1283 MODNAME ": parse line=%d m=%x next=%x cont=%d desc=%s", 1284 lineno, m, m->next, m->cont_level, m->desc); 1285#endif /* MIME_MAGIC_DEBUG */ 1286 1287 return 0; 1288} 1289 1290/* 1291 * Read a numeric value from a pointer, into the value union of a magic 1292 * pointer, according to the magic type. Update the string pointer to point 1293 * just after the number read. Return 0 for success, non-zero for failure. 1294 */ 1295static int getvalue(server_rec *s, struct magic *m, char **p) 1296{ 1297 int slen; 1298 1299 if (m->type == STRING) { 1300 *p = getstr(s, *p, m->value.s, sizeof(m->value.s), &slen); 1301 m->vallen = slen; 1302 } 1303 else if (m->reln != 'x') 1304 m->value.l = signextend(s, m, strtol(*p, p, 0)); 1305 return 0; 1306} 1307 1308/* 1309 * Convert a string containing C character escapes. Stop at an unescaped 1310 * space or tab. Copy the converted version to "p", returning its length in 1311 * *slen. Return updated scan pointer as function result. 1312 */ 1313static char *getstr(server_rec *serv, register char *s, register char *p, 1314 int plen, int *slen) 1315{ 1316 char *origs = s, *origp = p; 1317 char *pmax = p + plen - 1; 1318 register int c; 1319 register int val; 1320 1321 while ((c = *s++) != '\0') { 1322 if (apr_isspace(c)) 1323 break; 1324 if (p >= pmax) { 1325 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, 1326 MODNAME ": string too long: %s", origs); 1327 break; 1328 } 1329 if (c == '\\') { 1330 switch (c = *s++) { 1331 1332 case '\0': 1333 goto out; 1334 1335 default: 1336 *p++ = (char) c; 1337 break; 1338 1339 case 'n': 1340 *p++ = '\n'; 1341 break; 1342 1343 case 'r': 1344 *p++ = '\r'; 1345 break; 1346 1347 case 'b': 1348 *p++ = '\b'; 1349 break; 1350 1351 case 't': 1352 *p++ = '\t'; 1353 break; 1354 1355 case 'f': 1356 *p++ = '\f'; 1357 break; 1358 1359 case 'v': 1360 *p++ = '\v'; 1361 break; 1362 1363 /* \ and up to 3 octal digits */ 1364 case '0': 1365 case '1': 1366 case '2': 1367 case '3': 1368 case '4': 1369 case '5': 1370 case '6': 1371 case '7': 1372 val = c - '0'; 1373 c = *s++; /* try for 2 */ 1374 if (c >= '0' && c <= '7') { 1375 val = (val << 3) | (c - '0'); 1376 c = *s++; /* try for 3 */ 1377 if (c >= '0' && c <= '7') 1378 val = (val << 3) | (c - '0'); 1379 else 1380 --s; 1381 } 1382 else 1383 --s; 1384 *p++ = (char) val; 1385 break; 1386 1387 /* \x and up to 3 hex digits */ 1388 case 'x': 1389 val = 'x'; /* Default if no digits */ 1390 c = hextoint(*s++); /* Get next char */ 1391 if (c >= 0) { 1392 val = c; 1393 c = hextoint(*s++); 1394 if (c >= 0) { 1395 val = (val << 4) + c; 1396 c = hextoint(*s++); 1397 if (c >= 0) { 1398 val = (val << 4) + c; 1399 } 1400 else 1401 --s; 1402 } 1403 else 1404 --s; 1405 } 1406 else 1407 --s; 1408 *p++ = (char) val; 1409 break; 1410 } 1411 } 1412 else 1413 *p++ = (char) c; 1414 } 1415 out: 1416 *p = '\0'; 1417 *slen = p - origp; 1418 return s; 1419} 1420 1421 1422/* Single hex char to int; -1 if not a hex char. */ 1423static int hextoint(int c) 1424{ 1425 if (apr_isdigit(c)) 1426 return c - '0'; 1427 if ((c >= 'a') && (c <= 'f')) 1428 return c + 10 - 'a'; 1429 if ((c >= 'A') && (c <= 'F')) 1430 return c + 10 - 'A'; 1431 return -1; 1432} 1433 1434 1435/* 1436 * return DONE to indicate it's been handled 1437 * return OK to indicate it's a regular file still needing handling 1438 * other returns indicate a failure of some sort 1439 */ 1440static int fsmagic(request_rec *r, const char *fn) 1441{ 1442 switch (r->finfo.filetype) { 1443 case APR_DIR: 1444 magic_rsl_puts(r, DIR_MAGIC_TYPE); 1445 return DONE; 1446 case APR_CHR: 1447 /* 1448 * (void) magic_rsl_printf(r,"character special (%d/%d)", 1449 * major(sb->st_rdev), minor(sb->st_rdev)); 1450 */ 1451 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN); 1452 return DONE; 1453 case APR_BLK: 1454 /* 1455 * (void) magic_rsl_printf(r,"block special (%d/%d)", 1456 * major(sb->st_rdev), minor(sb->st_rdev)); 1457 */ 1458 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN); 1459 return DONE; 1460 /* TODO add code to handle V7 MUX and Blit MUX files */ 1461 case APR_PIPE: 1462 /* 1463 * magic_rsl_puts(r,"fifo (named pipe)"); 1464 */ 1465 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN); 1466 return DONE; 1467 case APR_LNK: 1468 /* We used stat(), the only possible reason for this is that the 1469 * symlink is broken. 1470 */ 1471 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 1472 MODNAME ": broken symlink (%s)", fn); 1473 return HTTP_INTERNAL_SERVER_ERROR; 1474 case APR_SOCK: 1475 magic_rsl_puts(r, MIME_BINARY_UNKNOWN); 1476 return DONE; 1477 case APR_REG: 1478 break; 1479 default: 1480 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 1481 MODNAME ": invalid file type %d.", r->finfo.filetype); 1482 return HTTP_INTERNAL_SERVER_ERROR; 1483 } 1484 1485 /* 1486 * regular file, check next possibility 1487 */ 1488 if (r->finfo.size == 0) { 1489 magic_rsl_puts(r, MIME_TEXT_UNKNOWN); 1490 return DONE; 1491 } 1492 return OK; 1493} 1494 1495/* 1496 * softmagic - lookup one file in database (already read from /etc/magic by 1497 * apprentice.c). Passed the name and FILE * of one file to be typed. 1498 */ 1499 /* ARGSUSED1 *//* nbytes passed for regularity, maybe need later */ 1500static int softmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes) 1501{ 1502 if (match(r, buf, nbytes)) 1503 return 1; 1504 1505 return 0; 1506} 1507 1508/* 1509 * Go through the whole list, stopping if you find a match. Process all the 1510 * continuations of that match before returning. 1511 * 1512 * We support multi-level continuations: 1513 * 1514 * At any time when processing a successful top-level match, there is a current 1515 * continuation level; it represents the level of the last successfully 1516 * matched continuation. 1517 * 1518 * Continuations above that level are skipped as, if we see one, it means that 1519 * the continuation that controls them - i.e, the lower-level continuation 1520 * preceding them - failed to match. 1521 * 1522 * Continuations below that level are processed as, if we see one, it means 1523 * we've finished processing or skipping higher-level continuations under the 1524 * control of a successful or unsuccessful lower-level continuation, and are 1525 * now seeing the next lower-level continuation and should process it. The 1526 * current continuation level reverts to the level of the one we're seeing. 1527 * 1528 * Continuations at the current level are processed as, if we see one, there's 1529 * no lower-level continuation that may have failed. 1530 * 1531 * If a continuation matches, we bump the current continuation level so that 1532 * higher-level continuations are processed. 1533 */ 1534static int match(request_rec *r, unsigned char *s, apr_size_t nbytes) 1535{ 1536#if MIME_MAGIC_DEBUG 1537 int rule_counter = 0; 1538#endif 1539 int cont_level = 0; 1540 int need_separator = 0; 1541 union VALUETYPE p; 1542 magic_server_config_rec *conf = (magic_server_config_rec *) 1543 ap_get_module_config(r->server->module_config, &mime_magic_module); 1544 struct magic *m; 1545 1546#if MIME_MAGIC_DEBUG 1547 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1548 MODNAME ": match conf=%x file=%s m=%s m->next=%s last=%s", 1549 conf, 1550 conf->magicfile ? conf->magicfile : "NULL", 1551 conf->magic ? "set" : "NULL", 1552 (conf->magic && conf->magic->next) ? "set" : "NULL", 1553 conf->last ? "set" : "NULL"); 1554#endif 1555 1556#if MIME_MAGIC_DEBUG 1557 for (m = conf->magic; m; m = m->next) { 1558 if (apr_isprint((((unsigned long) m) >> 24) & 255) && 1559 apr_isprint((((unsigned long) m) >> 16) & 255) && 1560 apr_isprint((((unsigned long) m) >> 8) & 255) && 1561 apr_isprint(((unsigned long) m) & 255)) { 1562 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1563 MODNAME ": match: POINTER CLOBBERED! " 1564 "m=\"%c%c%c%c\"", 1565 (((unsigned long) m) >> 24) & 255, 1566 (((unsigned long) m) >> 16) & 255, 1567 (((unsigned long) m) >> 8) & 255, 1568 ((unsigned long) m) & 255); 1569 break; 1570 } 1571 } 1572#endif 1573 1574 for (m = conf->magic; m; m = m->next) { 1575#if MIME_MAGIC_DEBUG 1576 rule_counter++; 1577 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1578 MODNAME ": line=%d desc=%s", m->lineno, m->desc); 1579#endif 1580 1581 /* check if main entry matches */ 1582 if (!mget(r, &p, s, m, nbytes) || 1583 !mcheck(r, &p, m)) { 1584 struct magic *m_cont; 1585 1586 /* 1587 * main entry didn't match, flush its continuations 1588 */ 1589 if (!m->next || (m->next->cont_level == 0)) { 1590 continue; 1591 } 1592 1593 m_cont = m->next; 1594 while (m_cont && (m_cont->cont_level != 0)) { 1595#if MIME_MAGIC_DEBUG 1596 rule_counter++; 1597 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1598 MODNAME ": line=%d mc=%x mc->next=%x cont=%d desc=%s", 1599 m_cont->lineno, m_cont, 1600 m_cont->next, m_cont->cont_level, 1601 m_cont->desc); 1602#endif 1603 /* 1604 * this trick allows us to keep *m in sync when the continue 1605 * advances the pointer 1606 */ 1607 m = m_cont; 1608 m_cont = m_cont->next; 1609 } 1610 continue; 1611 } 1612 1613 /* if we get here, the main entry rule was a match */ 1614 /* this will be the last run through the loop */ 1615#if MIME_MAGIC_DEBUG 1616 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1617 MODNAME ": rule matched, line=%d type=%d %s", 1618 m->lineno, m->type, 1619 (m->type == STRING) ? m->value.s : ""); 1620#endif 1621 1622 /* print the match */ 1623 mprint(r, &p, m); 1624 1625 /* 1626 * If we printed something, we'll need to print a blank before we 1627 * print something else. 1628 */ 1629 if (m->desc[0]) 1630 need_separator = 1; 1631 /* and any continuations that match */ 1632 cont_level++; 1633 /* 1634 * while (m && m->next && m->next->cont_level != 0 && ( m = m->next 1635 * )) 1636 */ 1637 m = m->next; 1638 while (m && (m->cont_level != 0)) { 1639#if MIME_MAGIC_DEBUG 1640 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1641 MODNAME ": match line=%d cont=%d type=%d %s", 1642 m->lineno, m->cont_level, m->type, 1643 (m->type == STRING) ? m->value.s : ""); 1644#endif 1645 if (cont_level >= m->cont_level) { 1646 if (cont_level > m->cont_level) { 1647 /* 1648 * We're at the end of the level "cont_level" 1649 * continuations. 1650 */ 1651 cont_level = m->cont_level; 1652 } 1653 if (mget(r, &p, s, m, nbytes) && 1654 mcheck(r, &p, m)) { 1655 /* 1656 * This continuation matched. Print its message, with a 1657 * blank before it if the previous item printed and this 1658 * item isn't empty. 1659 */ 1660 /* space if previous printed */ 1661 if (need_separator 1662 && (m->nospflag == 0) 1663 && (m->desc[0] != '\0') 1664 ) { 1665 (void) magic_rsl_putchar(r, ' '); 1666 need_separator = 0; 1667 } 1668 mprint(r, &p, m); 1669 if (m->desc[0]) 1670 need_separator = 1; 1671 1672 /* 1673 * If we see any continuations at a higher level, process 1674 * them. 1675 */ 1676 cont_level++; 1677 } 1678 } 1679 1680 /* move to next continuation record */ 1681 m = m->next; 1682 } 1683#if MIME_MAGIC_DEBUG 1684 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1685 MODNAME ": matched after %d rules", rule_counter); 1686#endif 1687 return 1; /* all through */ 1688 } 1689#if MIME_MAGIC_DEBUG 1690 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1691 MODNAME ": failed after %d rules", rule_counter); 1692#endif 1693 return 0; /* no match at all */ 1694} 1695 1696static void mprint(request_rec *r, union VALUETYPE *p, struct magic *m) 1697{ 1698 char *pp; 1699 unsigned long v; 1700 char time_str[APR_CTIME_LEN]; 1701 1702 switch (m->type) { 1703 case BYTE: 1704 v = p->b; 1705 break; 1706 1707 case SHORT: 1708 case BESHORT: 1709 case LESHORT: 1710 v = p->h; 1711 break; 1712 1713 case LONG: 1714 case BELONG: 1715 case LELONG: 1716 v = p->l; 1717 break; 1718 1719 case STRING: 1720 if (m->reln == '=') { 1721 (void) magic_rsl_printf(r, m->desc, m->value.s); 1722 } 1723 else { 1724 (void) magic_rsl_printf(r, m->desc, p->s); 1725 } 1726 return; 1727 1728 case DATE: 1729 case BEDATE: 1730 case LEDATE: 1731 apr_ctime(time_str, apr_time_from_sec(*(time_t *)&p->l)); 1732 pp = time_str; 1733 (void) magic_rsl_printf(r, m->desc, pp); 1734 return; 1735 default: 1736 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 1737 MODNAME ": invalid m->type (%d) in mprint().", 1738 m->type); 1739 return; 1740 } 1741 1742 v = signextend(r->server, m, v) & m->mask; 1743 (void) magic_rsl_printf(r, m->desc, (unsigned long) v); 1744} 1745 1746/* 1747 * Convert the byte order of the data we are looking at 1748 */ 1749static int mconvert(request_rec *r, union VALUETYPE *p, struct magic *m) 1750{ 1751 char *rt; 1752 1753 switch (m->type) { 1754 case BYTE: 1755 case SHORT: 1756 case LONG: 1757 case DATE: 1758 return 1; 1759 case STRING: 1760 /* Null terminate and eat the return */ 1761 p->s[sizeof(p->s) - 1] = '\0'; 1762 if ((rt = strchr(p->s, '\n')) != NULL) 1763 *rt = '\0'; 1764 return 1; 1765 case BESHORT: 1766 p->h = (short) ((p->hs[0] << 8) | (p->hs[1])); 1767 return 1; 1768 case BELONG: 1769 case BEDATE: 1770 p->l = (long) 1771 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3])); 1772 return 1; 1773 case LESHORT: 1774 p->h = (short) ((p->hs[1] << 8) | (p->hs[0])); 1775 return 1; 1776 case LELONG: 1777 case LEDATE: 1778 p->l = (long) 1779 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0])); 1780 return 1; 1781 default: 1782 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 1783 MODNAME ": invalid type %d in mconvert().", m->type); 1784 return 0; 1785 } 1786} 1787 1788 1789static int mget(request_rec *r, union VALUETYPE *p, unsigned char *s, 1790 struct magic *m, apr_size_t nbytes) 1791{ 1792 long offset = m->offset; 1793 1794 if (offset + sizeof(union VALUETYPE) > nbytes) 1795 return 0; 1796 1797 memcpy(p, s + offset, sizeof(union VALUETYPE)); 1798 1799 if (!mconvert(r, p, m)) 1800 return 0; 1801 1802 if (m->flag & INDIR) { 1803 1804 switch (m->in.type) { 1805 case BYTE: 1806 offset = p->b + m->in.offset; 1807 break; 1808 case SHORT: 1809 offset = p->h + m->in.offset; 1810 break; 1811 case LONG: 1812 offset = p->l + m->in.offset; 1813 break; 1814 } 1815 1816 if (offset + sizeof(union VALUETYPE) > nbytes) 1817 return 0; 1818 1819 memcpy(p, s + offset, sizeof(union VALUETYPE)); 1820 1821 if (!mconvert(r, p, m)) 1822 return 0; 1823 } 1824 return 1; 1825} 1826 1827static int mcheck(request_rec *r, union VALUETYPE *p, struct magic *m) 1828{ 1829 register unsigned long l = m->value.l; 1830 register unsigned long v; 1831 int matched; 1832 1833 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) { 1834 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 1835 MODNAME ": BOINK"); 1836 return 1; 1837 } 1838 1839 switch (m->type) { 1840 case BYTE: 1841 v = p->b; 1842 break; 1843 1844 case SHORT: 1845 case BESHORT: 1846 case LESHORT: 1847 v = p->h; 1848 break; 1849 1850 case LONG: 1851 case BELONG: 1852 case LELONG: 1853 case DATE: 1854 case BEDATE: 1855 case LEDATE: 1856 v = p->l; 1857 break; 1858 1859 case STRING: 1860 l = 0; 1861 /* 1862 * What we want here is: v = strncmp(m->value.s, p->s, m->vallen); 1863 * but ignoring any nulls. bcmp doesn't give -/+/0 and isn't 1864 * universally available anyway. 1865 */ 1866 v = 0; 1867 { 1868 register unsigned char *a = (unsigned char *) m->value.s; 1869 register unsigned char *b = (unsigned char *) p->s; 1870 register int len = m->vallen; 1871 1872 while (--len >= 0) 1873 if ((v = *b++ - *a++) != 0) 1874 break; 1875 } 1876 break; 1877 default: 1878 /* bogosity, pretend that it just wasn't a match */ 1879 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 1880 MODNAME ": invalid type %d in mcheck().", m->type); 1881 return 0; 1882 } 1883 1884 v = signextend(r->server, m, v) & m->mask; 1885 1886 switch (m->reln) { 1887 case 'x': 1888#if MIME_MAGIC_DEBUG 1889 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1890 "%lu == *any* = 1", v); 1891#endif 1892 matched = 1; 1893 break; 1894 1895 case '!': 1896 matched = v != l; 1897#if MIME_MAGIC_DEBUG 1898 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1899 "%lu != %lu = %d", v, l, matched); 1900#endif 1901 break; 1902 1903 case '=': 1904 matched = v == l; 1905#if MIME_MAGIC_DEBUG 1906 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1907 "%lu == %lu = %d", v, l, matched); 1908#endif 1909 break; 1910 1911 case '>': 1912 if (m->flag & UNSIGNED) { 1913 matched = v > l; 1914#if MIME_MAGIC_DEBUG 1915 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1916 "%lu > %lu = %d", v, l, matched); 1917#endif 1918 } 1919 else { 1920 matched = (long) v > (long) l; 1921#if MIME_MAGIC_DEBUG 1922 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1923 "%ld > %ld = %d", v, l, matched); 1924#endif 1925 } 1926 break; 1927 1928 case '<': 1929 if (m->flag & UNSIGNED) { 1930 matched = v < l; 1931#if MIME_MAGIC_DEBUG 1932 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1933 "%lu < %lu = %d", v, l, matched); 1934#endif 1935 } 1936 else { 1937 matched = (long) v < (long) l; 1938#if MIME_MAGIC_DEBUG 1939 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1940 "%ld < %ld = %d", v, l, matched); 1941#endif 1942 } 1943 break; 1944 1945 case '&': 1946 matched = (v & l) == l; 1947#if MIME_MAGIC_DEBUG 1948 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1949 "((%lx & %lx) == %lx) = %d", v, l, l, matched); 1950#endif 1951 break; 1952 1953 case '^': 1954 matched = (v & l) != l; 1955#if MIME_MAGIC_DEBUG 1956 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 1957 "((%lx & %lx) != %lx) = %d", v, l, l, matched); 1958#endif 1959 break; 1960 1961 default: 1962 /* bogosity, pretend it didn't match */ 1963 matched = 0; 1964 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 1965 MODNAME ": mcheck: can't happen: invalid relation %d.", 1966 m->reln); 1967 break; 1968 } 1969 1970 return matched; 1971} 1972 1973/* an optimization over plain strcmp() */ 1974#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) 1975 1976static int ascmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes) 1977{ 1978 int has_escapes = 0; 1979 unsigned char *s; 1980 char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */ 1981 char *token; 1982 register struct names *p; 1983 int small_nbytes; 1984 char *strtok_state; 1985 1986 /* these are easy, do them first */ 1987 1988 /* 1989 * for troff, look for . + letter + letter or .\"; this must be done to 1990 * disambiguate tar archives' ./file and other trash from real troff 1991 * input. 1992 */ 1993 if (*buf == '.') { 1994 unsigned char *tp = buf + 1; 1995 1996 while (apr_isspace(*tp)) 1997 ++tp; /* skip leading whitespace */ 1998 if ((apr_isalnum(*tp) || *tp == '\\') && 1999 (apr_isalnum(*(tp + 1)) || *tp == '"')) { 2000 magic_rsl_puts(r, "application/x-troff"); 2001 return 1; 2002 } 2003 } 2004 if ((*buf == 'c' || *buf == 'C') && apr_isspace(*(buf + 1))) { 2005 /* Fortran */ 2006 magic_rsl_puts(r, "text/plain"); 2007 return 1; 2008 } 2009 2010 /* look for tokens from names.h - this is expensive!, so we'll limit 2011 * ourselves to only SMALL_HOWMANY bytes */ 2012 small_nbytes = (nbytes > SMALL_HOWMANY) ? SMALL_HOWMANY : nbytes; 2013 /* make a copy of the buffer here because apr_strtok() will destroy it */ 2014 s = (unsigned char *) memcpy(nbuf, buf, small_nbytes); 2015 s[small_nbytes] = '\0'; 2016 has_escapes = (memchr(s, '\033', small_nbytes) != NULL); 2017 while ((token = apr_strtok((char *) s, " \t\n\r\f", &strtok_state)) != NULL) { 2018 s = NULL; /* make apr_strtok() keep on tokin' */ 2019 for (p = names; p < names + NNAMES; p++) { 2020 if (STREQ(p->name, token)) { 2021 magic_rsl_puts(r, types[p->type]); 2022 if (has_escapes) 2023 magic_rsl_puts(r, " (with escape sequences)"); 2024 return 1; 2025 } 2026 } 2027 } 2028 2029 switch (is_tar(buf, nbytes)) { 2030 case 1: 2031 /* V7 tar archive */ 2032 magic_rsl_puts(r, "application/x-tar"); 2033 return 1; 2034 case 2: 2035 /* POSIX tar archive */ 2036 magic_rsl_puts(r, "application/x-tar"); 2037 return 1; 2038 } 2039 2040 /* all else fails, but it is ascii... */ 2041 return 0; 2042} 2043 2044 2045/* 2046 * compress routines: zmagic() - returns 0 if not recognized, uncompresses 2047 * and prints information if recognized uncompress(s, method, old, n, newch) 2048 * - uncompress old into new, using method, return sizeof new 2049 */ 2050 2051static struct { 2052 char *magic; 2053 apr_size_t maglen; 2054 char *argv[3]; 2055 int silent; 2056 char *encoding; /* MUST be lowercase */ 2057} compr[] = { 2058 2059 /* we use gzip here rather than uncompress because we have to pass 2060 * it a full filename -- and uncompress only considers filenames 2061 * ending with .Z 2062 */ 2063 { 2064 "\037\235", 2, { 2065 "gzip", "-dcq", NULL 2066 }, 0, "x-compress" 2067 }, 2068 { 2069 "\037\213", 2, { 2070 "gzip", "-dcq", NULL 2071 }, 1, "x-gzip" 2072 }, 2073 /* 2074 * XXX pcat does not work, cause I don't know how to make it read stdin, 2075 * so we use gzip 2076 */ 2077 { 2078 "\037\036", 2, { 2079 "gzip", "-dcq", NULL 2080 }, 0, "x-gzip" 2081 }, 2082}; 2083 2084static int ncompr = sizeof(compr) / sizeof(compr[0]); 2085 2086static int zmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes) 2087{ 2088 unsigned char *newbuf; 2089 int newsize; 2090 int i; 2091 2092 for (i = 0; i < ncompr; i++) { 2093 if (nbytes < compr[i].maglen) 2094 continue; 2095 if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0) 2096 break; 2097 } 2098 2099 if (i == ncompr) 2100 return 0; 2101 2102 if ((newsize = uncompress(r, i, &newbuf, HOWMANY)) > 0) { 2103 /* set encoding type in the request record */ 2104 r->content_encoding = compr[i].encoding; 2105 2106 newbuf[newsize-1] = '\0'; /* null-terminate uncompressed data */ 2107 /* Try to detect the content type of the uncompressed data */ 2108 if (tryit(r, newbuf, newsize, 0) != OK) { 2109 return 0; 2110 } 2111 } 2112 return 1; 2113} 2114 2115 2116struct uncompress_parms { 2117 request_rec *r; 2118 int method; 2119}; 2120 2121static int create_uncompress_child(struct uncompress_parms *parm, apr_pool_t *cntxt, 2122 apr_file_t **pipe_in) 2123{ 2124 int rc = 1; 2125 const char *new_argv[4]; 2126 request_rec *r = parm->r; 2127 apr_pool_t *child_context = cntxt; 2128 apr_procattr_t *procattr; 2129 apr_proc_t *procnew; 2130 2131 /* XXX missing 1.3 logic: 2132 * 2133 * what happens when !compr[parm->method].silent? 2134 * Should we create the err pipe, read it, and copy to the log? 2135 */ 2136 2137 if ((apr_procattr_create(&procattr, child_context) != APR_SUCCESS) || 2138 (apr_procattr_io_set(procattr, APR_FULL_BLOCK, 2139 APR_FULL_BLOCK, APR_NO_PIPE) != APR_SUCCESS) || 2140 (apr_procattr_dir_set(procattr, 2141 ap_make_dirstr_parent(r->pool, r->filename)) != APR_SUCCESS) || 2142 (apr_procattr_cmdtype_set(procattr, APR_PROGRAM_PATH) != APR_SUCCESS)) { 2143 /* Something bad happened, tell the world. */ 2144 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, 2145 "couldn't setup child process: %s", r->filename); 2146 } 2147 else { 2148 new_argv[0] = compr[parm->method].argv[0]; 2149 new_argv[1] = compr[parm->method].argv[1]; 2150 new_argv[2] = r->filename; 2151 new_argv[3] = NULL; 2152 2153 procnew = apr_pcalloc(child_context, sizeof(*procnew)); 2154 rc = apr_proc_create(procnew, compr[parm->method].argv[0], 2155 new_argv, NULL, procattr, child_context); 2156 2157 if (rc != APR_SUCCESS) { 2158 /* Bad things happened. Everyone should have cleaned up. */ 2159 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, 2160 MODNAME ": could not execute `%s'.", 2161 compr[parm->method].argv[0]); 2162 } 2163 else { 2164 apr_pool_note_subprocess(child_context, procnew, APR_KILL_AFTER_TIMEOUT); 2165 *pipe_in = procnew->out; 2166 } 2167 } 2168 2169 return (rc); 2170} 2171 2172static int uncompress(request_rec *r, int method, 2173 unsigned char **newch, apr_size_t n) 2174{ 2175 struct uncompress_parms parm; 2176 apr_file_t *pipe_out = NULL; 2177 apr_pool_t *sub_context; 2178 apr_status_t rv; 2179 2180 parm.r = r; 2181 parm.method = method; 2182 2183 /* We make a sub_pool so that we can collect our child early, otherwise 2184 * there are cases (i.e. generating directory indicies with mod_autoindex) 2185 * where we would end up with LOTS of zombies. 2186 */ 2187 if (apr_pool_create(&sub_context, r->pool) != APR_SUCCESS) 2188 return -1; 2189 2190 if ((rv = create_uncompress_child(&parm, sub_context, &pipe_out)) != APR_SUCCESS) { 2191 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, 2192 MODNAME ": couldn't spawn uncompress process: %s", r->uri); 2193 return -1; 2194 } 2195 2196 *newch = (unsigned char *) apr_palloc(r->pool, n); 2197 rv = apr_file_read(pipe_out, *newch, &n); 2198 if (n == 0) { 2199 apr_pool_destroy(sub_context); 2200 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, 2201 MODNAME ": read failed from uncompress of %s", r->filename); 2202 return -1; 2203 } 2204 apr_pool_destroy(sub_context); 2205 return n; 2206} 2207 2208/* 2209 * is_tar() -- figure out whether file is a tar archive. 2210 * 2211 * Stolen (by author of file utility) from the public domain tar program: Public 2212 * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). 2213 * 2214 * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7 2215 * 1997/06/24 00:41:02 ikluft Exp ikluft $ 2216 * 2217 * Comments changed and some code/comments reformatted for file command by Ian 2218 * Darwin. 2219 */ 2220 2221#define isodigit(c) (((unsigned char)(c) >= '0') && ((unsigned char)(c) <= '7')) 2222 2223/* 2224 * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for 2225 * old UNIX tar file, 2 for Unix Std (POSIX) tar file. 2226 */ 2227 2228static int is_tar(unsigned char *buf, apr_size_t nbytes) 2229{ 2230 register union record *header = (union record *) buf; 2231 register int i; 2232 register long sum, recsum; 2233 register char *p; 2234 2235 if (nbytes < sizeof(union record)) 2236 return 0; 2237 2238 recsum = from_oct(8, header->header.chksum); 2239 2240 sum = 0; 2241 p = header->charptr; 2242 for (i = sizeof(union record); --i >= 0;) { 2243 /* 2244 * We can't use unsigned char here because of old compilers, e.g. V7. 2245 */ 2246 sum += 0xFF & *p++; 2247 } 2248 2249 /* Adjust checksum to count the "chksum" field as blanks. */ 2250 for (i = sizeof(header->header.chksum); --i >= 0;) 2251 sum -= 0xFF & header->header.chksum[i]; 2252 sum += ' ' * sizeof header->header.chksum; 2253 2254 if (sum != recsum) 2255 return 0; /* Not a tar archive */ 2256 2257 if (0 == strcmp(header->header.magic, TMAGIC)) 2258 return 2; /* Unix Standard tar archive */ 2259 2260 return 1; /* Old fashioned tar archive */ 2261} 2262 2263 2264/* 2265 * Quick and dirty octal conversion. 2266 * 2267 * Result is -1 if the field is invalid (all blank, or nonoctal). 2268 */ 2269static long from_oct(int digs, char *where) 2270{ 2271 register long value; 2272 2273 while (apr_isspace(*where)) { /* Skip spaces */ 2274 where++; 2275 if (--digs <= 0) 2276 return -1; /* All blank field */ 2277 } 2278 value = 0; 2279 while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ 2280 value = (value << 3) | (*where++ - '0'); 2281 --digs; 2282 } 2283 2284 if (digs > 0 && *where && !apr_isspace(*where)) 2285 return -1; /* Ended on non-space/nul */ 2286 2287 return value; 2288} 2289 2290/* 2291 * Check for file-revision suffix 2292 * 2293 * This is for an obscure document control system used on an intranet. 2294 * The web representation of each file's revision has an @1, @2, etc 2295 * appended with the revision number. This needs to be stripped off to 2296 * find the file suffix, which can be recognized by sending the name back 2297 * through a sub-request. The base file name (without the @num suffix) 2298 * must exist because its type will be used as the result. 2299 */ 2300static int revision_suffix(request_rec *r) 2301{ 2302 int suffix_pos, result; 2303 char *sub_filename; 2304 request_rec *sub; 2305 2306#if MIME_MAGIC_DEBUG 2307 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 2308 MODNAME ": revision_suffix checking %s", r->filename); 2309#endif /* MIME_MAGIC_DEBUG */ 2310 2311 /* check for recognized revision suffix */ 2312 suffix_pos = strlen(r->filename) - 1; 2313 if (!apr_isdigit(r->filename[suffix_pos])) { 2314 return 0; 2315 } 2316 while (suffix_pos >= 0 && apr_isdigit(r->filename[suffix_pos])) 2317 suffix_pos--; 2318 if (suffix_pos < 0 || r->filename[suffix_pos] != '@') { 2319 return 0; 2320 } 2321 2322 /* perform sub-request for the file name without the suffix */ 2323 result = 0; 2324 sub_filename = apr_pstrndup(r->pool, r->filename, suffix_pos); 2325#if MIME_MAGIC_DEBUG 2326 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 2327 MODNAME ": subrequest lookup for %s", sub_filename); 2328#endif /* MIME_MAGIC_DEBUG */ 2329 sub = ap_sub_req_lookup_file(sub_filename, r, NULL); 2330 2331 /* extract content type/encoding/language from sub-request */ 2332 if (sub->content_type) { 2333 ap_set_content_type(r, apr_pstrdup(r->pool, sub->content_type)); 2334#if MIME_MAGIC_DEBUG 2335 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 2336 MODNAME ": subrequest %s got %s", 2337 sub_filename, r->content_type); 2338#endif /* MIME_MAGIC_DEBUG */ 2339 if (sub->content_encoding) 2340 r->content_encoding = 2341 apr_pstrdup(r->pool, sub->content_encoding); 2342 if (sub->content_languages) { 2343 int n; 2344 r->content_languages = apr_array_copy(r->pool, 2345 sub->content_languages); 2346 for (n = 0; n < r->content_languages->nelts; ++n) { 2347 char **lang = ((char **)r->content_languages->elts) + n; 2348 *lang = apr_pstrdup(r->pool, *lang); 2349 } 2350 } 2351 result = 1; 2352 } 2353 2354 /* clean up */ 2355 ap_destroy_sub_req(sub); 2356 2357 return result; 2358} 2359 2360/* 2361 * initialize the module 2362 */ 2363static int magic_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server) 2364{ 2365 int result; 2366 magic_server_config_rec *conf; 2367 magic_server_config_rec *main_conf; 2368 server_rec *s; 2369#if MIME_MAGIC_DEBUG 2370 struct magic *m, *prevm; 2371#endif /* MIME_MAGIC_DEBUG */ 2372 2373 main_conf = ap_get_module_config(main_server->module_config, &mime_magic_module); 2374 for (s = main_server; s; s = s->next) { 2375 conf = ap_get_module_config(s->module_config, &mime_magic_module); 2376 if (conf->magicfile == NULL && s != main_server) { 2377 /* inherits from the parent */ 2378 *conf = *main_conf; 2379 } 2380 else if (conf->magicfile) { 2381 result = apprentice(s, p); 2382 if (result == -1) 2383 return OK; 2384#if MIME_MAGIC_DEBUG 2385 prevm = 0; 2386 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, 2387 MODNAME ": magic_init 1 test"); 2388 for (m = conf->magic; m; m = m->next) { 2389 if (apr_isprint((((unsigned long) m) >> 24) & 255) && 2390 apr_isprint((((unsigned long) m) >> 16) & 255) && 2391 apr_isprint((((unsigned long) m) >> 8) & 255) && 2392 apr_isprint(((unsigned long) m) & 255)) { 2393 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, 2394 MODNAME ": magic_init 1: POINTER CLOBBERED! " 2395 "m=\"%c%c%c%c\" line=%d", 2396 (((unsigned long) m) >> 24) & 255, 2397 (((unsigned long) m) >> 16) & 255, 2398 (((unsigned long) m) >> 8) & 255, 2399 ((unsigned long) m) & 255, 2400 prevm ? prevm->lineno : -1); 2401 break; 2402 } 2403 prevm = m; 2404 } 2405#endif 2406 } 2407 } 2408 return OK; 2409} 2410 2411/* 2412 * Find the Content-Type from any resource this module has available 2413 */ 2414 2415static int magic_find_ct(request_rec *r) 2416{ 2417 int result; 2418 magic_server_config_rec *conf; 2419 2420 /* the file has to exist */ 2421 if (r->finfo.filetype == 0 || !r->filename) { 2422 return DECLINED; 2423 } 2424 2425 /* was someone else already here? */ 2426 if (r->content_type) { 2427 return DECLINED; 2428 } 2429 2430 conf = ap_get_module_config(r->server->module_config, &mime_magic_module); 2431 if (!conf || !conf->magic) { 2432 return DECLINED; 2433 } 2434 2435 /* initialize per-request info */ 2436 if (!magic_set_config(r)) { 2437 return HTTP_INTERNAL_SERVER_ERROR; 2438 } 2439 2440 /* try excluding file-revision suffixes */ 2441 if (revision_suffix(r) != 1) { 2442 /* process it based on the file contents */ 2443 if ((result = magic_process(r)) != OK) { 2444 return result; 2445 } 2446 } 2447 2448 /* if we have any results, put them in the request structure */ 2449 return magic_rsl_to_request(r); 2450} 2451 2452static void register_hooks(apr_pool_t *p) 2453{ 2454 static const char * const aszPre[]={ "mod_mime.c", NULL }; 2455 2456 /* mod_mime_magic should be run after mod_mime, if at all. */ 2457 2458 ap_hook_type_checker(magic_find_ct, aszPre, NULL, APR_HOOK_MIDDLE); 2459 ap_hook_post_config(magic_init, NULL, NULL, APR_HOOK_FIRST); 2460} 2461 2462/* 2463 * Apache API module interface 2464 */ 2465 2466module AP_MODULE_DECLARE_DATA mime_magic_module = 2467{ 2468 STANDARD20_MODULE_STUFF, 2469 NULL, /* dir config creator */ 2470 NULL, /* dir merger --- default is to override */ 2471 create_magic_server_config, /* server config */ 2472 merge_magic_server_config, /* merge server config */ 2473 mime_magic_cmds, /* command apr_table_t */ 2474 register_hooks /* register hooks */ 2475}; 2476