1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * mod_mime_magic: MIME type lookup via file magic numbers 19 * Copyright (c) 1996-1997 Cisco Systems, Inc. 20 * 21 * This software was submitted by Cisco Systems to the Apache Software Foundation in July 22 * 1997. Future revisions and derivatives of this source code must 23 * acknowledge Cisco Systems as the original contributor of this module. 24 * All other licensing and usage conditions are those of the Apache Software Foundation. 25 * 26 * Some of this code is derived from the free version of the file command 27 * originally posted to comp.sources.unix. Copyright info for that program 28 * is included below as required. 29 * --------------------------------------------------------------------------- 30 * - Copyright (c) Ian F. Darwin, 1987. Written by Ian F. Darwin. 31 * 32 * This software is not subject to any license of the American Telephone and 33 * Telegraph Company or of the Regents of the University of California. 34 * 35 * Permission is granted to anyone to use this software for any purpose on any 36 * computer system, and to alter it and redistribute it freely, subject to 37 * the following restrictions: 38 * 39 * 1. The author is not responsible for the consequences of use of this 40 * software, no matter how awful, even if they arise from flaws in it. 41 * 42 * 2. The origin of this software must not be misrepresented, either by 43 * explicit claim or by omission. Since few users ever read sources, credits 44 * must appear in the documentation. 45 * 46 * 3. Altered versions must be plainly marked as such, and must not be 47 * misrepresented as being the original software. Since few users ever read 48 * sources, credits must appear in the documentation. 49 * 50 * 4. This notice may not be removed or altered. 51 * ------------------------------------------------------------------------- 52 * 53 * For compliance with Mr Darwin's terms: this has been very significantly 54 * modified from the free "file" command. 55 * - all-in-one file for compilation convenience when moving from one 56 * version of Apache to the next. 57 * - Memory allocation is done through the Apache API's apr_pool_t structure. 58 * - All functions have had necessary Apache API request or server 59 * structures passed to them where necessary to call other Apache API 60 * routines. (i.e. usually for logging, files, or memory allocation in 61 * itself or a called function.) 62 * - struct magic has been converted from an array to a single-ended linked 63 * list because it only grows one record at a time, it's only accessed 64 * sequentially, and the Apache API has no equivalent of realloc(). 65 * - Functions have been changed to get their parameters from the server 66 * configuration instead of globals. (It should be reentrant now but has 67 * not been tested in a threaded environment.) 68 * - Places where it used to print results to stdout now saves them in a 69 * list where they're used to set the MIME type in the Apache request 70 * record. 71 * - Command-line flags have been removed since they will never be used here. 72 * 73 * Ian Kluft <ikluft@cisco.com> 74 * Engineering Information Framework 75 * Central Engineering 76 * Cisco Systems, Inc. 77 * San Jose, CA, USA 78 * 79 * Initial installation July/August 1996 80 * Misc bug fixes May 1997 81 * Submission to Apache Software Foundation July 1997 82 * 83 */ 84 85#include "apr.h" 86#include "apr_strings.h" 87#include "apr_lib.h" 88#define APR_WANT_STRFUNC 89#include "apr_want.h" 90 91#if APR_HAVE_UNISTD_H 92#include <unistd.h> 93#endif 94 95#include "ap_config.h" 96#include "httpd.h" 97#include "http_config.h" 98#include "http_request.h" 99#include "http_core.h" 100#include "http_log.h" 101#include "http_protocol.h" 102#include "util_script.h" 103 104/* ### this isn't set by configure? does anybody set this? */ 105#ifdef HAVE_UTIME_H 106#include <utime.h> 107#endif 108 109/* 110 * data structures and related constants 111 */ 112 113#define MODNAME "mod_mime_magic" 114#define MIME_MAGIC_DEBUG 0 115 116#define MIME_BINARY_UNKNOWN "application/octet-stream" 117#define MIME_TEXT_UNKNOWN "text/plain" 118 119#define MAXMIMESTRING 256 120 121/* HOWMANY must be at least 4096 to make gzip -dcq work */ 122#define HOWMANY 4096 123/* SMALL_HOWMANY limits how much work we do to figure out text files */ 124#define SMALL_HOWMANY 1024 125#define MAXDESC 50 /* max leng of text description */ 126#define MAXstring 64 /* max leng of "string" types */ 127 128struct magic { 129 struct magic *next; /* link to next entry */ 130 int lineno; /* line number from magic file */ 131 132 short flag; 133#define INDIR 1 /* if '>(...)' appears, */ 134#define UNSIGNED 2 /* comparison is unsigned */ 135 short cont_level; /* level of ">" */ 136 struct { 137 char type; /* byte short long */ 138 long offset; /* offset from indirection */ 139 } in; 140 long offset; /* offset to magic number */ 141 unsigned char reln; /* relation (0=eq, '>'=gt, etc) */ 142 char type; /* int, short, long or string. */ 143 char vallen; /* length of string value, if any */ 144#define BYTE 1 145#define SHORT 2 146#define LONG 4 147#define STRING 5 148#define DATE 6 149#define BESHORT 7 150#define BELONG 8 151#define BEDATE 9 152#define LESHORT 10 153#define LELONG 11 154#define LEDATE 12 155 union VALUETYPE { 156 unsigned char b; 157 unsigned short h; 158 unsigned long l; 159 char s[MAXstring]; 160 unsigned char hs[2]; /* 2 bytes of a fixed-endian "short" */ 161 unsigned char hl[4]; /* 2 bytes of a fixed-endian "long" */ 162 } value; /* either number or string */ 163 unsigned long mask; /* mask before comparison with value */ 164 char nospflag; /* supress space character */ 165 166 /* NOTE: this string is suspected of overrunning - find it! */ 167 char desc[MAXDESC]; /* description */ 168}; 169 170/* 171 * data structures for tar file recognition 172 * -------------------------------------------------------------------------- 173 * Header file for public domain tar (tape archive) program. 174 * 175 * @(#)tar.h 1.20 86/10/29 Public Domain. Created 25 August 1985 by John 176 * Gilmore, ihnp4!hoptoad!gnu. 177 * 178 * Header block on tape. 179 * 180 * I'm going to use traditional DP naming conventions here. A "block" is a big 181 * chunk of stuff that we do I/O on. A "record" is a piece of info that we 182 * care about. Typically many "record"s fit into a "block". 183 */ 184#define RECORDSIZE 512 185#define NAMSIZ 100 186#define TUNMLEN 32 187#define TGNMLEN 32 188 189union record { 190 char charptr[RECORDSIZE]; 191 struct header { 192 char name[NAMSIZ]; 193 char mode[8]; 194 char uid[8]; 195 char gid[8]; 196 char size[12]; 197 char mtime[12]; 198 char chksum[8]; 199 char linkflag; 200 char linkname[NAMSIZ]; 201 char magic[8]; 202 char uname[TUNMLEN]; 203 char gname[TGNMLEN]; 204 char devmajor[8]; 205 char devminor[8]; 206 } header; 207}; 208 209/* The magic field is filled with this if uname and gname are valid. */ 210#define TMAGIC "ustar " /* 7 chars and a null */ 211 212/* 213 * file-function prototypes 214 */ 215static int ascmagic(request_rec *, unsigned char *, apr_size_t); 216static int is_tar(unsigned char *, apr_size_t); 217static int softmagic(request_rec *, unsigned char *, apr_size_t); 218static int tryit(request_rec *, unsigned char *, apr_size_t, int); 219static int zmagic(request_rec *, unsigned char *, apr_size_t); 220 221static int getvalue(server_rec *, struct magic *, char **); 222static int hextoint(int); 223static char *getstr(server_rec *, char *, char *, int, int *); 224static int parse(server_rec *, apr_pool_t *p, char *, int); 225 226static int match(request_rec *, unsigned char *, apr_size_t); 227static int mget(request_rec *, union VALUETYPE *, unsigned char *, 228 struct magic *, apr_size_t); 229static int mcheck(request_rec *, union VALUETYPE *, struct magic *); 230static void mprint(request_rec *, union VALUETYPE *, struct magic *); 231 232static int uncompress(request_rec *, int, 233 unsigned char **, apr_size_t); 234static long from_oct(int, char *); 235static int fsmagic(request_rec *r, const char *fn); 236 237/* 238 * includes for ASCII substring recognition formerly "names.h" in file 239 * command 240 * 241 * Original notes: names and types used by ascmagic in file(1). These tokens are 242 * here because they can appear anywhere in the first HOWMANY bytes, while 243 * tokens in /etc/magic must appear at fixed offsets into the file. Don't 244 * make HOWMANY too high unless you have a very fast CPU. 245 */ 246 247/* these types are used to index the apr_table_t 'types': keep em in sync! */ 248/* HTML inserted in first because this is a web server module now */ 249#define L_HTML 0 /* HTML */ 250#define L_C 1 /* first and foremost on UNIX */ 251#define L_FORT 2 /* the oldest one */ 252#define L_MAKE 3 /* Makefiles */ 253#define L_PLI 4 /* PL/1 */ 254#define L_MACH 5 /* some kinda assembler */ 255#define L_ENG 6 /* English */ 256#define L_PAS 7 /* Pascal */ 257#define L_MAIL 8 /* Electronic mail */ 258#define L_NEWS 9 /* Usenet Netnews */ 259 260static const char *types[] = 261{ 262 "text/html", /* HTML */ 263 "text/plain", /* "c program text", */ 264 "text/plain", /* "fortran program text", */ 265 "text/plain", /* "make commands text", */ 266 "text/plain", /* "pl/1 program text", */ 267 "text/plain", /* "assembler program text", */ 268 "text/plain", /* "English text", */ 269 "text/plain", /* "pascal program text", */ 270 "message/rfc822", /* "mail text", */ 271 "message/news", /* "news text", */ 272 "application/binary", /* "can't happen error on names.h/types", */ 273 0 274}; 275 276static const struct names { 277 const char *name; 278 short type; 279} names[] = { 280 281 /* These must be sorted by eye for optimal hit rate */ 282 /* Add to this list only after substantial meditation */ 283 { 284 "<html>", L_HTML 285 }, 286 { 287 "<HTML>", L_HTML 288 }, 289 { 290 "<head>", L_HTML 291 }, 292 { 293 "<HEAD>", L_HTML 294 }, 295 { 296 "<title>", L_HTML 297 }, 298 { 299 "<TITLE>", L_HTML 300 }, 301 { 302 "<h1>", L_HTML 303 }, 304 { 305 "<H1>", L_HTML 306 }, 307 { 308 "<!--", L_HTML 309 }, 310 { 311 "<!DOCTYPE HTML", L_HTML 312 }, 313 { 314 "/*", L_C 315 }, /* must precede "The", "the", etc. */ 316 { 317 "#include", L_C 318 }, 319 { 320 "char", L_C 321 }, 322 { 323 "The", L_ENG 324 }, 325 { 326 "the", L_ENG 327 }, 328 { 329 "double", L_C 330 }, 331 { 332 "extern", L_C 333 }, 334 { 335 "float", L_C 336 }, 337 { 338 "real", L_C 339 }, 340 { 341 "struct", L_C 342 }, 343 { 344 "union", L_C 345 }, 346 { 347 "CFLAGS", L_MAKE 348 }, 349 { 350 "LDFLAGS", L_MAKE 351 }, 352 { 353 "all:", L_MAKE 354 }, 355 { 356 ".PRECIOUS", L_MAKE 357 }, 358 /* 359 * Too many files of text have these words in them. Find another way to 360 * recognize Fortrash. 361 */ 362#ifdef NOTDEF 363 { 364 "subroutine", L_FORT 365 }, 366 { 367 "function", L_FORT 368 }, 369 { 370 "block", L_FORT 371 }, 372 { 373 "common", L_FORT 374 }, 375 { 376 "dimension", L_FORT 377 }, 378 { 379 "integer", L_FORT 380 }, 381 { 382 "data", L_FORT 383 }, 384#endif /* NOTDEF */ 385 { 386 ".ascii", L_MACH 387 }, 388 { 389 ".asciiz", L_MACH 390 }, 391 { 392 ".byte", L_MACH 393 }, 394 { 395 ".even", L_MACH 396 }, 397 { 398 ".globl", L_MACH 399 }, 400 { 401 "clr", L_MACH 402 }, 403 { 404 "(input,", L_PAS 405 }, 406 { 407 "dcl", L_PLI 408 }, 409 { 410 "Received:", L_MAIL 411 }, 412 { 413 ">From", L_MAIL 414 }, 415 { 416 "Return-Path:", L_MAIL 417 }, 418 { 419 "Cc:", L_MAIL 420 }, 421 { 422 "Newsgroups:", L_NEWS 423 }, 424 { 425 "Path:", L_NEWS 426 }, 427 { 428 "Organization:", L_NEWS 429 }, 430 { 431 NULL, 0 432 } 433}; 434 435#define NNAMES ((sizeof(names)/sizeof(struct names)) - 1) 436 437/* 438 * Result String List (RSL) 439 * 440 * The file(1) command prints its output. Instead, we store the various 441 * "printed" strings in a list (allocating memory as we go) and concatenate 442 * them at the end when we finally know how much space they'll need. 443 */ 444 445typedef struct magic_rsl_s { 446 const char *str; /* string, possibly a fragment */ 447 struct magic_rsl_s *next; /* pointer to next fragment */ 448} magic_rsl; 449 450/* 451 * Apache module configuration structures 452 */ 453 454/* per-server info */ 455typedef struct { 456 const char *magicfile; /* where magic be found */ 457 struct magic *magic; /* head of magic config list */ 458 struct magic *last; 459} magic_server_config_rec; 460 461/* per-request info */ 462typedef struct { 463 magic_rsl *head; /* result string list */ 464 magic_rsl *tail; 465 unsigned suf_recursion; /* recursion depth in suffix check */ 466} magic_req_rec; 467 468/* 469 * configuration functions - called by Apache API routines 470 */ 471 472module AP_MODULE_DECLARE_DATA mime_magic_module; 473 474static void *create_magic_server_config(apr_pool_t *p, server_rec *d) 475{ 476 /* allocate the config - use pcalloc because it needs to be zeroed */ 477 return apr_pcalloc(p, sizeof(magic_server_config_rec)); 478} 479 480static void *merge_magic_server_config(apr_pool_t *p, void *basev, void *addv) 481{ 482 magic_server_config_rec *base = (magic_server_config_rec *) basev; 483 magic_server_config_rec *add = (magic_server_config_rec *) addv; 484 magic_server_config_rec *new = (magic_server_config_rec *) 485 apr_palloc(p, sizeof(magic_server_config_rec)); 486 487 new->magicfile = add->magicfile ? add->magicfile : base->magicfile; 488 new->magic = NULL; 489 new->last = NULL; 490 return new; 491} 492 493static const char *set_magicfile(cmd_parms *cmd, void *dummy, const char *arg) 494{ 495 magic_server_config_rec *conf = (magic_server_config_rec *) 496 ap_get_module_config(cmd->server->module_config, 497 &mime_magic_module); 498 499 if (!conf) { 500 return MODNAME ": server structure not allocated"; 501 } 502 conf->magicfile = arg; 503 return NULL; 504} 505 506/* 507 * configuration file commands - exported to Apache API 508 */ 509 510static const command_rec mime_magic_cmds[] = 511{ 512 AP_INIT_TAKE1("MimeMagicFile", set_magicfile, NULL, RSRC_CONF, 513 "Path to MIME Magic file (in file(1) format)"), 514 {NULL} 515}; 516 517/* 518 * RSL (result string list) processing routines 519 * 520 * These collect strings that would have been printed in fragments by file(1) 521 * into a list of magic_rsl structures with the strings. When complete, 522 * they're concatenated together to become the MIME content and encoding 523 * types. 524 * 525 * return value conventions for these functions: functions which return int: 526 * failure = -1, other = result functions which return pointers: failure = 0, 527 * other = result 528 */ 529 530/* allocate a per-request structure and put it in the request record */ 531static magic_req_rec *magic_set_config(request_rec *r) 532{ 533 magic_req_rec *req_dat = (magic_req_rec *) apr_palloc(r->pool, 534 sizeof(magic_req_rec)); 535 536 req_dat->head = req_dat->tail = (magic_rsl *) NULL; 537 ap_set_module_config(r->request_config, &mime_magic_module, req_dat); 538 return req_dat; 539} 540 541/* add a string to the result string list for this request */ 542/* it is the responsibility of the caller to allocate "str" */ 543static int magic_rsl_add(request_rec *r, const char *str) 544{ 545 magic_req_rec *req_dat = (magic_req_rec *) 546 ap_get_module_config(r->request_config, &mime_magic_module); 547 magic_rsl *rsl; 548 549 /* make sure we have a list to put it in */ 550 if (!req_dat) { 551 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EINVAL, r, APLOGNO(01507) 552 MODNAME ": request config should not be NULL"); 553 if (!(req_dat = magic_set_config(r))) { 554 /* failure */ 555 return -1; 556 } 557 } 558 559 /* allocate the list entry */ 560 rsl = (magic_rsl *) apr_palloc(r->pool, sizeof(magic_rsl)); 561 562 /* fill it */ 563 rsl->str = str; 564 rsl->next = (magic_rsl *) NULL; 565 566 /* append to the list */ 567 if (req_dat->head && req_dat->tail) { 568 req_dat->tail->next = rsl; 569 req_dat->tail = rsl; 570 } 571 else { 572 req_dat->head = req_dat->tail = rsl; 573 } 574 575 /* success */ 576 return 0; 577} 578 579/* RSL hook for puts-type functions */ 580static int magic_rsl_puts(request_rec *r, const char *str) 581{ 582 return magic_rsl_add(r, str); 583} 584 585/* RSL hook for printf-type functions */ 586static int magic_rsl_printf(request_rec *r, char *str,...) 587{ 588 va_list ap; 589 590 char buf[MAXMIMESTRING]; 591 592 /* assemble the string into the buffer */ 593 va_start(ap, str); 594 apr_vsnprintf(buf, sizeof(buf), str, ap); 595 va_end(ap); 596 597 /* add the buffer to the list */ 598 return magic_rsl_add(r, apr_pstrdup(r->pool, buf)); 599} 600 601/* RSL hook for putchar-type functions */ 602static int magic_rsl_putchar(request_rec *r, char c) 603{ 604 char str[2]; 605 606 /* high overhead for 1 char - just hope they don't do this much */ 607 str[0] = c; 608 str[1] = '\0'; 609 return magic_rsl_add(r, str); 610} 611 612/* allocate and copy a contiguous string from a result string list */ 613static char *rsl_strdup(request_rec *r, int start_frag, int start_pos, int len) 614{ 615 char *result; /* return value */ 616 int cur_frag, /* current fragment number/counter */ 617 cur_pos, /* current position within fragment */ 618 res_pos; /* position in result string */ 619 magic_rsl *frag; /* list-traversal pointer */ 620 magic_req_rec *req_dat = (magic_req_rec *) 621 ap_get_module_config(r->request_config, &mime_magic_module); 622 623 /* allocate the result string */ 624 result = (char *) apr_palloc(r->pool, len + 1); 625 626 /* loop through and collect the string */ 627 res_pos = 0; 628 for (frag = req_dat->head, cur_frag = 0; 629 frag->next; 630 frag = frag->next, cur_frag++) { 631 /* loop to the first fragment */ 632 if (cur_frag < start_frag) 633 continue; 634 635 /* loop through and collect chars */ 636 for (cur_pos = (cur_frag == start_frag) ? start_pos : 0; 637 frag->str[cur_pos]; 638 cur_pos++) { 639 if (cur_frag >= start_frag 640 && cur_pos >= start_pos 641 && res_pos <= len) { 642 result[res_pos++] = frag->str[cur_pos]; 643 if (res_pos > len) { 644 break; 645 } 646 } 647 } 648 } 649 650 /* clean up and return */ 651 result[res_pos] = 0; 652#if MIME_MAGIC_DEBUG 653 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01508) 654 MODNAME ": rsl_strdup() %d chars: %s", res_pos - 1, result); 655#endif 656 return result; 657} 658 659/* states for the state-machine algorithm in magic_rsl_to_request() */ 660typedef enum { 661 rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding 662} rsl_states; 663 664/* process the RSL and set the MIME info in the request record */ 665static int magic_rsl_to_request(request_rec *r) 666{ 667 int cur_frag, /* current fragment number/counter */ 668 cur_pos, /* current position within fragment */ 669 type_frag, /* content type starting point: fragment */ 670 type_pos, /* content type starting point: position */ 671 type_len, /* content type length */ 672 encoding_frag, /* content encoding starting point: fragment */ 673 encoding_pos, /* content encoding starting point: position */ 674 encoding_len; /* content encoding length */ 675 676 char *tmp; 677 magic_rsl *frag; /* list-traversal pointer */ 678 rsl_states state; 679 680 magic_req_rec *req_dat = (magic_req_rec *) 681 ap_get_module_config(r->request_config, &mime_magic_module); 682 683 /* check if we have a result */ 684 if (!req_dat || !req_dat->head) { 685 /* empty - no match, we defer to other Apache modules */ 686 return DECLINED; 687 } 688 689 /* start searching for the type and encoding */ 690 state = rsl_leading_space; 691 type_frag = type_pos = type_len = 0; 692 encoding_frag = encoding_pos = encoding_len = 0; 693 for (frag = req_dat->head, cur_frag = 0; 694 frag && frag->next; 695 frag = frag->next, cur_frag++) { 696 /* loop through the characters in the fragment */ 697 for (cur_pos = 0; frag->str[cur_pos]; cur_pos++) { 698 if (apr_isspace(frag->str[cur_pos])) { 699 /* process whitespace actions for each state */ 700 if (state == rsl_leading_space) { 701 /* eat whitespace in this state */ 702 continue; 703 } 704 else if (state == rsl_type) { 705 /* whitespace: type has no slash! */ 706 return DECLINED; 707 } 708 else if (state == rsl_subtype) { 709 /* whitespace: end of MIME type */ 710 state++; 711 continue; 712 } 713 else if (state == rsl_separator) { 714 /* eat whitespace in this state */ 715 continue; 716 } 717 else if (state == rsl_encoding) { 718 /* whitespace: end of MIME encoding */ 719 /* we're done */ 720 frag = req_dat->tail; 721 break; 722 } 723 else { 724 /* should not be possible */ 725 /* abandon malfunctioning module */ 726 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01509) 727 MODNAME ": bad state %d (ws)", state); 728 return DECLINED; 729 } 730 /* NOTREACHED */ 731 } 732 else if (state == rsl_type && 733 frag->str[cur_pos] == '/') { 734 /* copy the char and go to rsl_subtype state */ 735 type_len++; 736 state++; 737 } 738 else { 739 /* process non-space actions for each state */ 740 if (state == rsl_leading_space) { 741 /* non-space: begin MIME type */ 742 state++; 743 type_frag = cur_frag; 744 type_pos = cur_pos; 745 type_len = 1; 746 continue; 747 } 748 else if (state == rsl_type || 749 state == rsl_subtype) { 750 /* non-space: adds to type */ 751 type_len++; 752 continue; 753 } 754 else if (state == rsl_separator) { 755 /* non-space: begin MIME encoding */ 756 state++; 757 encoding_frag = cur_frag; 758 encoding_pos = cur_pos; 759 encoding_len = 1; 760 continue; 761 } 762 else if (state == rsl_encoding) { 763 /* non-space: adds to encoding */ 764 encoding_len++; 765 continue; 766 } 767 else { 768 /* should not be possible */ 769 /* abandon malfunctioning module */ 770 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01510) 771 MODNAME ": bad state %d (ns)", state); 772 return DECLINED; 773 } 774 /* NOTREACHED */ 775 } 776 /* NOTREACHED */ 777 } 778 } 779 780 /* if we ended prior to state rsl_subtype, we had incomplete info */ 781 if (state != rsl_subtype && state != rsl_separator && 782 state != rsl_encoding) { 783 /* defer to other modules */ 784 return DECLINED; 785 } 786 787 /* save the info in the request record */ 788 tmp = rsl_strdup(r, type_frag, type_pos, type_len); 789 /* XXX: this could be done at config time I'm sure... but I'm 790 * confused by all this magic_rsl stuff. -djg */ 791 ap_content_type_tolower(tmp); 792 ap_set_content_type(r, tmp); 793 794 if (state == rsl_encoding) { 795 tmp = rsl_strdup(r, encoding_frag, 796 encoding_pos, encoding_len); 797 /* XXX: this could be done at config time I'm sure... but I'm 798 * confused by all this magic_rsl stuff. -djg */ 799 ap_str_tolower(tmp); 800 r->content_encoding = tmp; 801 } 802 803 /* detect memory allocation or other errors */ 804 if (!r->content_type || 805 (state == rsl_encoding && !r->content_encoding)) { 806 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01511) 807 MODNAME ": unexpected state %d; could be caused by bad " 808 "data in magic file", 809 state); 810 return HTTP_INTERNAL_SERVER_ERROR; 811 } 812 813 /* success! */ 814 return OK; 815} 816 817/* 818 * magic_process - process input file r Apache API request record 819 * (formerly called "process" in file command, prefix added for clarity) Opens 820 * the file and reads a fixed-size buffer to begin processing the contents. 821 */ 822static int magic_process(request_rec *r) 823{ 824 apr_file_t *fd = NULL; 825 unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */ 826 apr_size_t nbytes = 0; /* number of bytes read from a datafile */ 827 int result; 828 829 /* 830 * first try judging the file based on its filesystem status 831 */ 832 switch ((result = fsmagic(r, r->filename))) { 833 case DONE: 834 magic_rsl_putchar(r, '\n'); 835 return OK; 836 case OK: 837 break; 838 default: 839 /* fatal error, bail out */ 840 return result; 841 } 842 843 if (apr_file_open(&fd, r->filename, APR_READ, APR_OS_DEFAULT, r->pool) != APR_SUCCESS) { 844 /* We can't open it, but we were able to stat it. */ 845 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01512) 846 MODNAME ": can't read `%s'", r->filename); 847 /* let some other handler decide what the problem is */ 848 return DECLINED; 849 } 850 851 /* 852 * try looking at the first HOWMANY bytes 853 */ 854 nbytes = sizeof(buf) - 1; 855 if ((result = apr_file_read(fd, (char *) buf, &nbytes)) != APR_SUCCESS) { 856 ap_log_rerror(APLOG_MARK, APLOG_ERR, result, r, APLOGNO(01513) 857 MODNAME ": read failed: %s", r->filename); 858 return HTTP_INTERNAL_SERVER_ERROR; 859 } 860 861 if (nbytes == 0) { 862 return DECLINED; 863 } 864 else { 865 buf[nbytes++] = '\0'; /* null-terminate it */ 866 result = tryit(r, buf, nbytes, 1); 867 if (result != OK) { 868 return result; 869 } 870 } 871 872 (void) apr_file_close(fd); 873 (void) magic_rsl_putchar(r, '\n'); 874 875 return OK; 876} 877 878 879static int tryit(request_rec *r, unsigned char *buf, apr_size_t nb, 880 int checkzmagic) 881{ 882 /* 883 * Try compression stuff 884 */ 885 if (checkzmagic == 1) { 886 if (zmagic(r, buf, nb) == 1) 887 return OK; 888 } 889 890 /* 891 * try tests in /etc/magic (or surrogate magic file) 892 */ 893 if (softmagic(r, buf, nb) == 1) 894 return OK; 895 896 /* 897 * try known keywords, check for ascii-ness too. 898 */ 899 if (ascmagic(r, buf, nb) == 1) 900 return OK; 901 902 /* 903 * abandon hope, all ye who remain here 904 */ 905 return DECLINED; 906} 907 908#define EATAB {while (apr_isspace(*l)) ++l;} 909 910/* 911 * apprentice - load configuration from the magic file r 912 * API request record 913 */ 914static int apprentice(server_rec *s, apr_pool_t *p) 915{ 916 apr_file_t *f = NULL; 917 apr_status_t result; 918 char line[BUFSIZ + 1]; 919 int errs = 0; 920 int lineno; 921#if MIME_MAGIC_DEBUG 922 int rule = 0; 923 struct magic *m, *prevm; 924#endif 925 magic_server_config_rec *conf = (magic_server_config_rec *) 926 ap_get_module_config(s->module_config, &mime_magic_module); 927 const char *fname = ap_server_root_relative(p, conf->magicfile); 928 929 if (!fname) { 930 ap_log_error(APLOG_MARK, APLOG_ERR, APR_EBADPATH, s, APLOGNO(01514) 931 MODNAME ": Invalid magic file path %s", conf->magicfile); 932 return -1; 933 } 934 if ((result = apr_file_open(&f, fname, APR_READ | APR_BUFFERED, 935 APR_OS_DEFAULT, p)) != APR_SUCCESS) { 936 ap_log_error(APLOG_MARK, APLOG_ERR, result, s, APLOGNO(01515) 937 MODNAME ": can't read magic file %s", fname); 938 return -1; 939 } 940 941 /* set up the magic list (empty) */ 942 conf->magic = conf->last = NULL; 943 944 /* parse it */ 945 for (lineno = 1; apr_file_gets(line, BUFSIZ, f) == APR_SUCCESS; lineno++) { 946 int ws_offset; 947 char *last = line + strlen(line) - 1; /* guaranteed that len >= 1 since an 948 * "empty" line contains a '\n' 949 */ 950 951 /* delete newline and any other trailing whitespace */ 952 while (last >= line 953 && apr_isspace(*last)) { 954 *last = '\0'; 955 --last; 956 } 957 958 /* skip leading whitespace */ 959 ws_offset = 0; 960 while (line[ws_offset] && apr_isspace(line[ws_offset])) { 961 ws_offset++; 962 } 963 964 /* skip blank lines */ 965 if (line[ws_offset] == 0) { 966 continue; 967 } 968 969 /* comment, do not parse */ 970 if (line[ws_offset] == '#') 971 continue; 972 973#if MIME_MAGIC_DEBUG 974 /* if we get here, we're going to use it so count it */ 975 rule++; 976#endif 977 978 /* parse it */ 979 if (parse(s, p, line + ws_offset, lineno) != 0) 980 ++errs; 981 } 982 983 (void) apr_file_close(f); 984 985#if MIME_MAGIC_DEBUG 986 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01516) 987 MODNAME ": apprentice conf=%x file=%s m=%s m->next=%s last=%s", 988 conf, 989 conf->magicfile ? conf->magicfile : "NULL", 990 conf->magic ? "set" : "NULL", 991 (conf->magic && conf->magic->next) ? "set" : "NULL", 992 conf->last ? "set" : "NULL"); 993 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01517) 994 MODNAME ": apprentice read %d lines, %d rules, %d errors", 995 lineno, rule, errs); 996#endif 997 998#if MIME_MAGIC_DEBUG 999 prevm = 0; 1000 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01518) 1001 MODNAME ": apprentice test"); 1002 for (m = conf->magic; m; m = m->next) { 1003 if (apr_isprint((((unsigned long) m) >> 24) & 255) && 1004 apr_isprint((((unsigned long) m) >> 16) & 255) && 1005 apr_isprint((((unsigned long) m) >> 8) & 255) && 1006 apr_isprint(((unsigned long) m) & 255)) { 1007 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01519) 1008 MODNAME ": apprentice: POINTER CLOBBERED! " 1009 "m=\"%c%c%c%c\" line=%d", 1010 (((unsigned long) m) >> 24) & 255, 1011 (((unsigned long) m) >> 16) & 255, 1012 (((unsigned long) m) >> 8) & 255, 1013 ((unsigned long) m) & 255, 1014 prevm ? prevm->lineno : -1); 1015 break; 1016 } 1017 prevm = m; 1018 } 1019#endif 1020 1021 return (errs ? -1 : 0); 1022} 1023 1024/* 1025 * extend the sign bit if the comparison is to be signed 1026 */ 1027static unsigned long signextend(server_rec *s, struct magic *m, unsigned long v) 1028{ 1029 if (!(m->flag & UNSIGNED)) 1030 switch (m->type) { 1031 /* 1032 * Do not remove the casts below. They are vital. When later 1033 * compared with the data, the sign extension must have happened. 1034 */ 1035 case BYTE: 1036 v = (char) v; 1037 break; 1038 case SHORT: 1039 case BESHORT: 1040 case LESHORT: 1041 v = (short) v; 1042 break; 1043 case DATE: 1044 case BEDATE: 1045 case LEDATE: 1046 case LONG: 1047 case BELONG: 1048 case LELONG: 1049 v = (long) v; 1050 break; 1051 case STRING: 1052 break; 1053 default: 1054 ap_log_error(APLOG_MARK, APLOG_ERR, 0, s, APLOGNO(01520) 1055 MODNAME ": can't happen: m->type=%d", m->type); 1056 return -1; 1057 } 1058 return v; 1059} 1060 1061/* 1062 * parse one line from magic file, put into magic[index++] if valid 1063 */ 1064static int parse(server_rec *serv, apr_pool_t *p, char *l, int lineno) 1065{ 1066 struct magic *m; 1067 char *t, *s; 1068 magic_server_config_rec *conf = (magic_server_config_rec *) 1069 ap_get_module_config(serv->module_config, &mime_magic_module); 1070 1071 /* allocate magic structure entry */ 1072 m = (struct magic *) apr_pcalloc(p, sizeof(struct magic)); 1073 1074 /* append to linked list */ 1075 m->next = NULL; 1076 if (!conf->magic || !conf->last) { 1077 conf->magic = conf->last = m; 1078 } 1079 else { 1080 conf->last->next = m; 1081 conf->last = m; 1082 } 1083 1084 /* set values in magic structure */ 1085 m->flag = 0; 1086 m->cont_level = 0; 1087 m->lineno = lineno; 1088 1089 while (*l == '>') { 1090 ++l; /* step over */ 1091 m->cont_level++; 1092 } 1093 1094 if (m->cont_level != 0 && *l == '(') { 1095 ++l; /* step over */ 1096 m->flag |= INDIR; 1097 } 1098 1099 /* get offset, then skip over it */ 1100 m->offset = (int) strtol(l, &t, 0); 1101 if (l == t) { 1102 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01521) 1103 MODNAME ": offset %s invalid", l); 1104 } 1105 l = t; 1106 1107 if (m->flag & INDIR) { 1108 m->in.type = LONG; 1109 m->in.offset = 0; 1110 /* 1111 * read [.lbs][+-]nnnnn) 1112 */ 1113 if (*l == '.') { 1114 switch (*++l) { 1115 case 'l': 1116 m->in.type = LONG; 1117 break; 1118 case 's': 1119 m->in.type = SHORT; 1120 break; 1121 case 'b': 1122 m->in.type = BYTE; 1123 break; 1124 default: 1125 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01522) 1126 MODNAME ": indirect offset type %c invalid", *l); 1127 break; 1128 } 1129 l++; 1130 } 1131 s = l; 1132 if (*l == '+' || *l == '-') 1133 l++; 1134 if (apr_isdigit((unsigned char) *l)) { 1135 m->in.offset = strtol(l, &t, 0); 1136 if (*s == '-') 1137 m->in.offset = -m->in.offset; 1138 } 1139 else 1140 t = l; 1141 if (*t++ != ')') { 1142 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01523) 1143 MODNAME ": missing ')' in indirect offset"); 1144 } 1145 l = t; 1146 } 1147 1148 1149 while (apr_isdigit((unsigned char) *l)) 1150 ++l; 1151 EATAB; 1152 1153#define NBYTE 4 1154#define NSHORT 5 1155#define NLONG 4 1156#define NSTRING 6 1157#define NDATE 4 1158#define NBESHORT 7 1159#define NBELONG 6 1160#define NBEDATE 6 1161#define NLESHORT 7 1162#define NLELONG 6 1163#define NLEDATE 6 1164 1165 if (*l == 'u') { 1166 ++l; 1167 m->flag |= UNSIGNED; 1168 } 1169 1170 /* get type, skip it */ 1171 if (strncmp(l, "byte", NBYTE) == 0) { 1172 m->type = BYTE; 1173 l += NBYTE; 1174 } 1175 else if (strncmp(l, "short", NSHORT) == 0) { 1176 m->type = SHORT; 1177 l += NSHORT; 1178 } 1179 else if (strncmp(l, "long", NLONG) == 0) { 1180 m->type = LONG; 1181 l += NLONG; 1182 } 1183 else if (strncmp(l, "string", NSTRING) == 0) { 1184 m->type = STRING; 1185 l += NSTRING; 1186 } 1187 else if (strncmp(l, "date", NDATE) == 0) { 1188 m->type = DATE; 1189 l += NDATE; 1190 } 1191 else if (strncmp(l, "beshort", NBESHORT) == 0) { 1192 m->type = BESHORT; 1193 l += NBESHORT; 1194 } 1195 else if (strncmp(l, "belong", NBELONG) == 0) { 1196 m->type = BELONG; 1197 l += NBELONG; 1198 } 1199 else if (strncmp(l, "bedate", NBEDATE) == 0) { 1200 m->type = BEDATE; 1201 l += NBEDATE; 1202 } 1203 else if (strncmp(l, "leshort", NLESHORT) == 0) { 1204 m->type = LESHORT; 1205 l += NLESHORT; 1206 } 1207 else if (strncmp(l, "lelong", NLELONG) == 0) { 1208 m->type = LELONG; 1209 l += NLELONG; 1210 } 1211 else if (strncmp(l, "ledate", NLEDATE) == 0) { 1212 m->type = LEDATE; 1213 l += NLEDATE; 1214 } 1215 else { 1216 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01524) 1217 MODNAME ": type %s invalid", l); 1218 return -1; 1219 } 1220 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1221 if (*l == '&') { 1222 ++l; 1223 m->mask = signextend(serv, m, strtol(l, &l, 0)); 1224 } 1225 else 1226 m->mask = ~0L; 1227 EATAB; 1228 1229 switch (*l) { 1230 case '>': 1231 case '<': 1232 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 1233 case '&': 1234 case '^': 1235 case '=': 1236 m->reln = *l; 1237 ++l; 1238 break; 1239 case '!': 1240 if (m->type != STRING) { 1241 m->reln = *l; 1242 ++l; 1243 break; 1244 } 1245 /* FALL THROUGH */ 1246 default: 1247 if (*l == 'x' && apr_isspace(l[1])) { 1248 m->reln = *l; 1249 ++l; 1250 goto GetDesc; /* Bill The Cat */ 1251 } 1252 m->reln = '='; 1253 break; 1254 } 1255 EATAB; 1256 1257 if (getvalue(serv, m, &l)) 1258 return -1; 1259 /* 1260 * now get last part - the description 1261 */ 1262 GetDesc: 1263 EATAB; 1264 if (l[0] == '\b') { 1265 ++l; 1266 m->nospflag = 1; 1267 } 1268 else if ((l[0] == '\\') && (l[1] == 'b')) { 1269 ++l; 1270 ++l; 1271 m->nospflag = 1; 1272 } 1273 else 1274 m->nospflag = 0; 1275 apr_cpystrn(m->desc, l, sizeof(m->desc)); 1276 1277#if MIME_MAGIC_DEBUG 1278 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, serv, APLOGNO(01525) 1279 MODNAME ": parse line=%d m=%x next=%x cont=%d desc=%s", 1280 lineno, m, m->next, m->cont_level, m->desc); 1281#endif /* MIME_MAGIC_DEBUG */ 1282 1283 return 0; 1284} 1285 1286/* 1287 * Read a numeric value from a pointer, into the value union of a magic 1288 * pointer, according to the magic type. Update the string pointer to point 1289 * just after the number read. Return 0 for success, non-zero for failure. 1290 */ 1291static int getvalue(server_rec *s, struct magic *m, char **p) 1292{ 1293 int slen; 1294 1295 if (m->type == STRING) { 1296 *p = getstr(s, *p, m->value.s, sizeof(m->value.s), &slen); 1297 m->vallen = slen; 1298 } 1299 else if (m->reln != 'x') 1300 m->value.l = signextend(s, m, strtol(*p, p, 0)); 1301 return 0; 1302} 1303 1304/* 1305 * Convert a string containing C character escapes. Stop at an unescaped 1306 * space or tab. Copy the converted version to "p", returning its length in 1307 * *slen. Return updated scan pointer as function result. 1308 */ 1309static char *getstr(server_rec *serv, register char *s, register char *p, 1310 int plen, int *slen) 1311{ 1312 char *origs = s, *origp = p; 1313 char *pmax = p + plen - 1; 1314 register int c; 1315 register int val; 1316 1317 while ((c = *s++) != '\0') { 1318 if (apr_isspace(c)) 1319 break; 1320 if (p >= pmax) { 1321 ap_log_error(APLOG_MARK, APLOG_ERR, 0, serv, APLOGNO(01526) 1322 MODNAME ": string too long: %s", origs); 1323 break; 1324 } 1325 if (c == '\\') { 1326 switch (c = *s++) { 1327 1328 case '\0': 1329 goto out; 1330 1331 default: 1332 *p++ = (char) c; 1333 break; 1334 1335 case 'n': 1336 *p++ = '\n'; 1337 break; 1338 1339 case 'r': 1340 *p++ = '\r'; 1341 break; 1342 1343 case 'b': 1344 *p++ = '\b'; 1345 break; 1346 1347 case 't': 1348 *p++ = '\t'; 1349 break; 1350 1351 case 'f': 1352 *p++ = '\f'; 1353 break; 1354 1355 case 'v': 1356 *p++ = '\v'; 1357 break; 1358 1359 /* \ and up to 3 octal digits */ 1360 case '0': 1361 case '1': 1362 case '2': 1363 case '3': 1364 case '4': 1365 case '5': 1366 case '6': 1367 case '7': 1368 val = c - '0'; 1369 c = *s++; /* try for 2 */ 1370 if (c >= '0' && c <= '7') { 1371 val = (val << 3) | (c - '0'); 1372 c = *s++; /* try for 3 */ 1373 if (c >= '0' && c <= '7') 1374 val = (val << 3) | (c - '0'); 1375 else 1376 --s; 1377 } 1378 else 1379 --s; 1380 *p++ = (char) val; 1381 break; 1382 1383 /* \x and up to 3 hex digits */ 1384 case 'x': 1385 val = 'x'; /* Default if no digits */ 1386 c = hextoint(*s++); /* Get next char */ 1387 if (c >= 0) { 1388 val = c; 1389 c = hextoint(*s++); 1390 if (c >= 0) { 1391 val = (val << 4) + c; 1392 c = hextoint(*s++); 1393 if (c >= 0) { 1394 val = (val << 4) + c; 1395 } 1396 else 1397 --s; 1398 } 1399 else 1400 --s; 1401 } 1402 else 1403 --s; 1404 *p++ = (char) val; 1405 break; 1406 } 1407 } 1408 else 1409 *p++ = (char) c; 1410 } 1411 out: 1412 *p = '\0'; 1413 *slen = p - origp; 1414 return s; 1415} 1416 1417 1418/* Single hex char to int; -1 if not a hex char. */ 1419static int hextoint(int c) 1420{ 1421 if (apr_isdigit(c)) 1422 return c - '0'; 1423 if ((c >= 'a') && (c <= 'f')) 1424 return c + 10 - 'a'; 1425 if ((c >= 'A') && (c <= 'F')) 1426 return c + 10 - 'A'; 1427 return -1; 1428} 1429 1430 1431/* 1432 * return DONE to indicate it's been handled 1433 * return OK to indicate it's a regular file still needing handling 1434 * other returns indicate a failure of some sort 1435 */ 1436static int fsmagic(request_rec *r, const char *fn) 1437{ 1438 switch (r->finfo.filetype) { 1439 case APR_DIR: 1440 magic_rsl_puts(r, DIR_MAGIC_TYPE); 1441 return DONE; 1442 case APR_CHR: 1443 /* 1444 * (void) magic_rsl_printf(r,"character special (%d/%d)", 1445 * major(sb->st_rdev), minor(sb->st_rdev)); 1446 */ 1447 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN); 1448 return DONE; 1449 case APR_BLK: 1450 /* 1451 * (void) magic_rsl_printf(r,"block special (%d/%d)", 1452 * major(sb->st_rdev), minor(sb->st_rdev)); 1453 */ 1454 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN); 1455 return DONE; 1456 /* TODO add code to handle V7 MUX and Blit MUX files */ 1457 case APR_PIPE: 1458 /* 1459 * magic_rsl_puts(r,"fifo (named pipe)"); 1460 */ 1461 (void) magic_rsl_puts(r, MIME_BINARY_UNKNOWN); 1462 return DONE; 1463 case APR_LNK: 1464 /* We used stat(), the only possible reason for this is that the 1465 * symlink is broken. 1466 */ 1467 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01527) 1468 MODNAME ": broken symlink (%s)", fn); 1469 return HTTP_INTERNAL_SERVER_ERROR; 1470 case APR_SOCK: 1471 magic_rsl_puts(r, MIME_BINARY_UNKNOWN); 1472 return DONE; 1473 case APR_REG: 1474 break; 1475 default: 1476 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01528) 1477 MODNAME ": invalid file type %d.", r->finfo.filetype); 1478 return HTTP_INTERNAL_SERVER_ERROR; 1479 } 1480 1481 /* 1482 * regular file, check next possibility 1483 */ 1484 if (r->finfo.size == 0) { 1485 magic_rsl_puts(r, MIME_TEXT_UNKNOWN); 1486 return DONE; 1487 } 1488 return OK; 1489} 1490 1491/* 1492 * softmagic - lookup one file in database (already read from /etc/magic by 1493 * apprentice.c). Passed the name and FILE * of one file to be typed. 1494 */ 1495 /* ARGSUSED1 *//* nbytes passed for regularity, maybe need later */ 1496static int softmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes) 1497{ 1498 if (match(r, buf, nbytes)) 1499 return 1; 1500 1501 return 0; 1502} 1503 1504/* 1505 * Go through the whole list, stopping if you find a match. Process all the 1506 * continuations of that match before returning. 1507 * 1508 * We support multi-level continuations: 1509 * 1510 * At any time when processing a successful top-level match, there is a current 1511 * continuation level; it represents the level of the last successfully 1512 * matched continuation. 1513 * 1514 * Continuations above that level are skipped as, if we see one, it means that 1515 * the continuation that controls them - i.e, the lower-level continuation 1516 * preceding them - failed to match. 1517 * 1518 * Continuations below that level are processed as, if we see one, it means 1519 * we've finished processing or skipping higher-level continuations under the 1520 * control of a successful or unsuccessful lower-level continuation, and are 1521 * now seeing the next lower-level continuation and should process it. The 1522 * current continuation level reverts to the level of the one we're seeing. 1523 * 1524 * Continuations at the current level are processed as, if we see one, there's 1525 * no lower-level continuation that may have failed. 1526 * 1527 * If a continuation matches, we bump the current continuation level so that 1528 * higher-level continuations are processed. 1529 */ 1530static int match(request_rec *r, unsigned char *s, apr_size_t nbytes) 1531{ 1532#if MIME_MAGIC_DEBUG 1533 int rule_counter = 0; 1534#endif 1535 int cont_level = 0; 1536 int need_separator = 0; 1537 union VALUETYPE p; 1538 magic_server_config_rec *conf = (magic_server_config_rec *) 1539 ap_get_module_config(r->server->module_config, &mime_magic_module); 1540 struct magic *m; 1541 1542#if MIME_MAGIC_DEBUG 1543 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01529) 1544 MODNAME ": match conf=%x file=%s m=%s m->next=%s last=%s", 1545 conf, 1546 conf->magicfile ? conf->magicfile : "NULL", 1547 conf->magic ? "set" : "NULL", 1548 (conf->magic && conf->magic->next) ? "set" : "NULL", 1549 conf->last ? "set" : "NULL"); 1550#endif 1551 1552#if MIME_MAGIC_DEBUG 1553 for (m = conf->magic; m; m = m->next) { 1554 if (apr_isprint((((unsigned long) m) >> 24) & 255) && 1555 apr_isprint((((unsigned long) m) >> 16) & 255) && 1556 apr_isprint((((unsigned long) m) >> 8) & 255) && 1557 apr_isprint(((unsigned long) m) & 255)) { 1558 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01530) 1559 MODNAME ": match: POINTER CLOBBERED! " 1560 "m=\"%c%c%c%c\"", 1561 (((unsigned long) m) >> 24) & 255, 1562 (((unsigned long) m) >> 16) & 255, 1563 (((unsigned long) m) >> 8) & 255, 1564 ((unsigned long) m) & 255); 1565 break; 1566 } 1567 } 1568#endif 1569 1570 for (m = conf->magic; m; m = m->next) { 1571#if MIME_MAGIC_DEBUG 1572 rule_counter++; 1573 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01531) 1574 MODNAME ": line=%d desc=%s", m->lineno, m->desc); 1575#endif 1576 1577 /* check if main entry matches */ 1578 if (!mget(r, &p, s, m, nbytes) || 1579 !mcheck(r, &p, m)) { 1580 struct magic *m_cont; 1581 1582 /* 1583 * main entry didn't match, flush its continuations 1584 */ 1585 if (!m->next || (m->next->cont_level == 0)) { 1586 continue; 1587 } 1588 1589 m_cont = m->next; 1590 while (m_cont && (m_cont->cont_level != 0)) { 1591#if MIME_MAGIC_DEBUG 1592 rule_counter++; 1593 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01532) 1594 MODNAME ": line=%d mc=%x mc->next=%x cont=%d desc=%s", 1595 m_cont->lineno, m_cont, 1596 m_cont->next, m_cont->cont_level, 1597 m_cont->desc); 1598#endif 1599 /* 1600 * this trick allows us to keep *m in sync when the continue 1601 * advances the pointer 1602 */ 1603 m = m_cont; 1604 m_cont = m_cont->next; 1605 } 1606 continue; 1607 } 1608 1609 /* if we get here, the main entry rule was a match */ 1610 /* this will be the last run through the loop */ 1611#if MIME_MAGIC_DEBUG 1612 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01533) 1613 MODNAME ": rule matched, line=%d type=%d %s", 1614 m->lineno, m->type, 1615 (m->type == STRING) ? m->value.s : ""); 1616#endif 1617 1618 /* print the match */ 1619 mprint(r, &p, m); 1620 1621 /* 1622 * If we printed something, we'll need to print a blank before we 1623 * print something else. 1624 */ 1625 if (m->desc[0]) 1626 need_separator = 1; 1627 /* and any continuations that match */ 1628 cont_level++; 1629 /* 1630 * while (m && m->next && m->next->cont_level != 0 && ( m = m->next 1631 * )) 1632 */ 1633 m = m->next; 1634 while (m && (m->cont_level != 0)) { 1635#if MIME_MAGIC_DEBUG 1636 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01534) 1637 MODNAME ": match line=%d cont=%d type=%d %s", 1638 m->lineno, m->cont_level, m->type, 1639 (m->type == STRING) ? m->value.s : ""); 1640#endif 1641 if (cont_level >= m->cont_level) { 1642 if (cont_level > m->cont_level) { 1643 /* 1644 * We're at the end of the level "cont_level" 1645 * continuations. 1646 */ 1647 cont_level = m->cont_level; 1648 } 1649 if (mget(r, &p, s, m, nbytes) && 1650 mcheck(r, &p, m)) { 1651 /* 1652 * This continuation matched. Print its message, with a 1653 * blank before it if the previous item printed and this 1654 * item isn't empty. 1655 */ 1656 /* space if previous printed */ 1657 if (need_separator 1658 && (m->nospflag == 0) 1659 && (m->desc[0] != '\0') 1660 ) { 1661 (void) magic_rsl_putchar(r, ' '); 1662 need_separator = 0; 1663 } 1664 mprint(r, &p, m); 1665 if (m->desc[0]) 1666 need_separator = 1; 1667 1668 /* 1669 * If we see any continuations at a higher level, process 1670 * them. 1671 */ 1672 cont_level++; 1673 } 1674 } 1675 1676 /* move to next continuation record */ 1677 m = m->next; 1678 } 1679#if MIME_MAGIC_DEBUG 1680 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01535) 1681 MODNAME ": matched after %d rules", rule_counter); 1682#endif 1683 return 1; /* all through */ 1684 } 1685#if MIME_MAGIC_DEBUG 1686 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01536) 1687 MODNAME ": failed after %d rules", rule_counter); 1688#endif 1689 return 0; /* no match at all */ 1690} 1691 1692static void mprint(request_rec *r, union VALUETYPE *p, struct magic *m) 1693{ 1694 char *pp; 1695 unsigned long v; 1696 char time_str[APR_CTIME_LEN]; 1697 1698 switch (m->type) { 1699 case BYTE: 1700 v = p->b; 1701 break; 1702 1703 case SHORT: 1704 case BESHORT: 1705 case LESHORT: 1706 v = p->h; 1707 break; 1708 1709 case LONG: 1710 case BELONG: 1711 case LELONG: 1712 v = p->l; 1713 break; 1714 1715 case STRING: 1716 if (m->reln == '=') { 1717 (void) magic_rsl_printf(r, m->desc, m->value.s); 1718 } 1719 else { 1720 (void) magic_rsl_printf(r, m->desc, p->s); 1721 } 1722 return; 1723 1724 case DATE: 1725 case BEDATE: 1726 case LEDATE: 1727 apr_ctime(time_str, apr_time_from_sec(*(time_t *)&p->l)); 1728 pp = time_str; 1729 (void) magic_rsl_printf(r, m->desc, pp); 1730 return; 1731 default: 1732 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01537) 1733 MODNAME ": invalid m->type (%d) in mprint().", 1734 m->type); 1735 return; 1736 } 1737 1738 v = signextend(r->server, m, v) & m->mask; 1739 (void) magic_rsl_printf(r, m->desc, (unsigned long) v); 1740} 1741 1742/* 1743 * Convert the byte order of the data we are looking at 1744 */ 1745static int mconvert(request_rec *r, union VALUETYPE *p, struct magic *m) 1746{ 1747 char *rt; 1748 1749 switch (m->type) { 1750 case BYTE: 1751 case SHORT: 1752 case LONG: 1753 case DATE: 1754 return 1; 1755 case STRING: 1756 /* Null terminate and eat the return */ 1757 p->s[sizeof(p->s) - 1] = '\0'; 1758 if ((rt = strchr(p->s, '\n')) != NULL) 1759 *rt = '\0'; 1760 return 1; 1761 case BESHORT: 1762 p->h = (short) ((p->hs[0] << 8) | (p->hs[1])); 1763 return 1; 1764 case BELONG: 1765 case BEDATE: 1766 p->l = (long) 1767 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3])); 1768 return 1; 1769 case LESHORT: 1770 p->h = (short) ((p->hs[1] << 8) | (p->hs[0])); 1771 return 1; 1772 case LELONG: 1773 case LEDATE: 1774 p->l = (long) 1775 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0])); 1776 return 1; 1777 default: 1778 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01538) 1779 MODNAME ": invalid type %d in mconvert().", m->type); 1780 return 0; 1781 } 1782} 1783 1784 1785static int mget(request_rec *r, union VALUETYPE *p, unsigned char *s, 1786 struct magic *m, apr_size_t nbytes) 1787{ 1788 long offset = m->offset; 1789 1790 if (offset + sizeof(union VALUETYPE) > nbytes) 1791 return 0; 1792 1793 memcpy(p, s + offset, sizeof(union VALUETYPE)); 1794 1795 if (!mconvert(r, p, m)) 1796 return 0; 1797 1798 if (m->flag & INDIR) { 1799 1800 switch (m->in.type) { 1801 case BYTE: 1802 offset = p->b + m->in.offset; 1803 break; 1804 case SHORT: 1805 offset = p->h + m->in.offset; 1806 break; 1807 case LONG: 1808 offset = p->l + m->in.offset; 1809 break; 1810 } 1811 1812 if (offset + sizeof(union VALUETYPE) > nbytes) 1813 return 0; 1814 1815 memcpy(p, s + offset, sizeof(union VALUETYPE)); 1816 1817 if (!mconvert(r, p, m)) 1818 return 0; 1819 } 1820 return 1; 1821} 1822 1823static int mcheck(request_rec *r, union VALUETYPE *p, struct magic *m) 1824{ 1825 register unsigned long l = m->value.l; 1826 register unsigned long v; 1827 int matched; 1828 1829 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) { 1830 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01539) 1831 MODNAME ": BOINK"); 1832 return 1; 1833 } 1834 1835 switch (m->type) { 1836 case BYTE: 1837 v = p->b; 1838 break; 1839 1840 case SHORT: 1841 case BESHORT: 1842 case LESHORT: 1843 v = p->h; 1844 break; 1845 1846 case LONG: 1847 case BELONG: 1848 case LELONG: 1849 case DATE: 1850 case BEDATE: 1851 case LEDATE: 1852 v = p->l; 1853 break; 1854 1855 case STRING: 1856 l = 0; 1857 /* 1858 * What we want here is: v = strncmp(m->value.s, p->s, m->vallen); 1859 * but ignoring any nulls. bcmp doesn't give -/+/0 and isn't 1860 * universally available anyway. 1861 */ 1862 v = 0; 1863 { 1864 register unsigned char *a = (unsigned char *) m->value.s; 1865 register unsigned char *b = (unsigned char *) p->s; 1866 register int len = m->vallen; 1867 1868 while (--len >= 0) 1869 if ((v = *b++ - *a++) != 0) 1870 break; 1871 } 1872 break; 1873 default: 1874 /* bogosity, pretend that it just wasn't a match */ 1875 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01540) 1876 MODNAME ": invalid type %d in mcheck().", m->type); 1877 return 0; 1878 } 1879 1880 v = signextend(r->server, m, v) & m->mask; 1881 1882 switch (m->reln) { 1883 case 'x': 1884#if MIME_MAGIC_DEBUG 1885 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01541) 1886 "%lu == *any* = 1", v); 1887#endif 1888 matched = 1; 1889 break; 1890 1891 case '!': 1892 matched = v != l; 1893#if MIME_MAGIC_DEBUG 1894 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01542) 1895 "%lu != %lu = %d", v, l, matched); 1896#endif 1897 break; 1898 1899 case '=': 1900 matched = v == l; 1901#if MIME_MAGIC_DEBUG 1902 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01543) 1903 "%lu == %lu = %d", v, l, matched); 1904#endif 1905 break; 1906 1907 case '>': 1908 if (m->flag & UNSIGNED) { 1909 matched = v > l; 1910#if MIME_MAGIC_DEBUG 1911 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01544) 1912 "%lu > %lu = %d", v, l, matched); 1913#endif 1914 } 1915 else { 1916 matched = (long) v > (long) l; 1917#if MIME_MAGIC_DEBUG 1918 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01545) 1919 "%ld > %ld = %d", v, l, matched); 1920#endif 1921 } 1922 break; 1923 1924 case '<': 1925 if (m->flag & UNSIGNED) { 1926 matched = v < l; 1927#if MIME_MAGIC_DEBUG 1928 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01546) 1929 "%lu < %lu = %d", v, l, matched); 1930#endif 1931 } 1932 else { 1933 matched = (long) v < (long) l; 1934#if MIME_MAGIC_DEBUG 1935 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01547) 1936 "%ld < %ld = %d", v, l, matched); 1937#endif 1938 } 1939 break; 1940 1941 case '&': 1942 matched = (v & l) == l; 1943#if MIME_MAGIC_DEBUG 1944 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01548) 1945 "((%lx & %lx) == %lx) = %d", v, l, l, matched); 1946#endif 1947 break; 1948 1949 case '^': 1950 matched = (v & l) != l; 1951#if MIME_MAGIC_DEBUG 1952 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01549) 1953 "((%lx & %lx) != %lx) = %d", v, l, l, matched); 1954#endif 1955 break; 1956 1957 default: 1958 /* bogosity, pretend it didn't match */ 1959 matched = 0; 1960 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01550) 1961 MODNAME ": mcheck: can't happen: invalid relation %d.", 1962 m->reln); 1963 break; 1964 } 1965 1966 return matched; 1967} 1968 1969/* an optimization over plain strcmp() */ 1970#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) 1971 1972static int ascmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes) 1973{ 1974 int has_escapes = 0; 1975 unsigned char *s; 1976 char nbuf[SMALL_HOWMANY + 1]; /* one extra for terminating '\0' */ 1977 char *token; 1978 const struct names *p; 1979 int small_nbytes; 1980 char *strtok_state; 1981 1982 /* these are easy, do them first */ 1983 1984 /* 1985 * for troff, look for . + letter + letter or .\"; this must be done to 1986 * disambiguate tar archives' ./file and other trash from real troff 1987 * input. 1988 */ 1989 if (*buf == '.') { 1990 unsigned char *tp = buf + 1; 1991 1992 while (apr_isspace(*tp)) 1993 ++tp; /* skip leading whitespace */ 1994 if ((apr_isalnum(*tp) || *tp == '\\') && 1995 (apr_isalnum(*(tp + 1)) || *tp == '"')) { 1996 magic_rsl_puts(r, "application/x-troff"); 1997 return 1; 1998 } 1999 } 2000 if ((*buf == 'c' || *buf == 'C') && apr_isspace(*(buf + 1))) { 2001 /* Fortran */ 2002 magic_rsl_puts(r, "text/plain"); 2003 return 1; 2004 } 2005 2006 /* look for tokens from names.h - this is expensive!, so we'll limit 2007 * ourselves to only SMALL_HOWMANY bytes */ 2008 small_nbytes = (nbytes > SMALL_HOWMANY) ? SMALL_HOWMANY : nbytes; 2009 /* make a copy of the buffer here because apr_strtok() will destroy it */ 2010 s = (unsigned char *) memcpy(nbuf, buf, small_nbytes); 2011 s[small_nbytes] = '\0'; 2012 has_escapes = (memchr(s, '\033', small_nbytes) != NULL); 2013 while ((token = apr_strtok((char *) s, " \t\n\r\f", &strtok_state)) != NULL) { 2014 s = NULL; /* make apr_strtok() keep on tokin' */ 2015 for (p = names; p < names + NNAMES; p++) { 2016 if (STREQ(p->name, token)) { 2017 magic_rsl_puts(r, types[p->type]); 2018 if (has_escapes) 2019 magic_rsl_puts(r, " (with escape sequences)"); 2020 return 1; 2021 } 2022 } 2023 } 2024 2025 switch (is_tar(buf, nbytes)) { 2026 case 1: 2027 /* V7 tar archive */ 2028 magic_rsl_puts(r, "application/x-tar"); 2029 return 1; 2030 case 2: 2031 /* POSIX tar archive */ 2032 magic_rsl_puts(r, "application/x-tar"); 2033 return 1; 2034 } 2035 2036 /* all else fails, but it is ascii... */ 2037 return 0; 2038} 2039 2040 2041/* 2042 * compress routines: zmagic() - returns 0 if not recognized, uncompresses 2043 * and prints information if recognized uncompress(s, method, old, n, newch) 2044 * - uncompress old into new, using method, return sizeof new 2045 */ 2046 2047static struct { 2048 char *magic; 2049 apr_size_t maglen; 2050 char *argv[3]; 2051 int silent; 2052 char *encoding; /* MUST be lowercase */ 2053} compr[] = { 2054 2055 /* we use gzip here rather than uncompress because we have to pass 2056 * it a full filename -- and uncompress only considers filenames 2057 * ending with .Z 2058 */ 2059 { 2060 "\037\235", 2, { 2061 "gzip", "-dcq", NULL 2062 }, 0, "x-compress" 2063 }, 2064 { 2065 "\037\213", 2, { 2066 "gzip", "-dcq", NULL 2067 }, 1, "x-gzip" 2068 }, 2069 /* 2070 * XXX pcat does not work, cause I don't know how to make it read stdin, 2071 * so we use gzip 2072 */ 2073 { 2074 "\037\036", 2, { 2075 "gzip", "-dcq", NULL 2076 }, 0, "x-gzip" 2077 }, 2078}; 2079 2080static int ncompr = sizeof(compr) / sizeof(compr[0]); 2081 2082static int zmagic(request_rec *r, unsigned char *buf, apr_size_t nbytes) 2083{ 2084 unsigned char *newbuf; 2085 int newsize; 2086 int i; 2087 2088 for (i = 0; i < ncompr; i++) { 2089 if (nbytes < compr[i].maglen) 2090 continue; 2091 if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0) 2092 break; 2093 } 2094 2095 if (i == ncompr) 2096 return 0; 2097 2098 if ((newsize = uncompress(r, i, &newbuf, HOWMANY)) > 0) { 2099 /* set encoding type in the request record */ 2100 r->content_encoding = compr[i].encoding; 2101 2102 newbuf[newsize-1] = '\0'; /* null-terminate uncompressed data */ 2103 /* Try to detect the content type of the uncompressed data */ 2104 if (tryit(r, newbuf, newsize, 0) != OK) { 2105 return 0; 2106 } 2107 } 2108 return 1; 2109} 2110 2111 2112struct uncompress_parms { 2113 request_rec *r; 2114 int method; 2115}; 2116 2117static int create_uncompress_child(struct uncompress_parms *parm, apr_pool_t *cntxt, 2118 apr_file_t **pipe_in) 2119{ 2120 int rc = 1; 2121 const char *new_argv[4]; 2122 request_rec *r = parm->r; 2123 apr_pool_t *child_context = cntxt; 2124 apr_procattr_t *procattr; 2125 apr_proc_t *procnew; 2126 2127 /* XXX missing 1.3 logic: 2128 * 2129 * what happens when !compr[parm->method].silent? 2130 * Should we create the err pipe, read it, and copy to the log? 2131 */ 2132 2133 if ((apr_procattr_create(&procattr, child_context) != APR_SUCCESS) || 2134 (apr_procattr_io_set(procattr, APR_FULL_BLOCK, 2135 APR_FULL_BLOCK, APR_NO_PIPE) != APR_SUCCESS) || 2136 (apr_procattr_dir_set(procattr, 2137 ap_make_dirstr_parent(r->pool, r->filename)) != APR_SUCCESS) || 2138 (apr_procattr_cmdtype_set(procattr, APR_PROGRAM_PATH) != APR_SUCCESS)) { 2139 /* Something bad happened, tell the world. */ 2140 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, APLOGNO(01551) 2141 "couldn't setup child process: %s", r->filename); 2142 } 2143 else { 2144 new_argv[0] = compr[parm->method].argv[0]; 2145 new_argv[1] = compr[parm->method].argv[1]; 2146 new_argv[2] = r->filename; 2147 new_argv[3] = NULL; 2148 2149 procnew = apr_pcalloc(child_context, sizeof(*procnew)); 2150 rc = apr_proc_create(procnew, compr[parm->method].argv[0], 2151 new_argv, NULL, procattr, child_context); 2152 2153 if (rc != APR_SUCCESS) { 2154 /* Bad things happened. Everyone should have cleaned up. */ 2155 ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_ENOPROC, r, APLOGNO(01552) 2156 MODNAME ": could not execute `%s'.", 2157 compr[parm->method].argv[0]); 2158 } 2159 else { 2160 apr_pool_note_subprocess(child_context, procnew, APR_KILL_AFTER_TIMEOUT); 2161 *pipe_in = procnew->out; 2162 } 2163 } 2164 2165 return (rc); 2166} 2167 2168static int uncompress(request_rec *r, int method, 2169 unsigned char **newch, apr_size_t n) 2170{ 2171 struct uncompress_parms parm; 2172 apr_file_t *pipe_out = NULL; 2173 apr_pool_t *sub_context; 2174 apr_status_t rv; 2175 2176 parm.r = r; 2177 parm.method = method; 2178 2179 /* We make a sub_pool so that we can collect our child early, otherwise 2180 * there are cases (i.e. generating directory indicies with mod_autoindex) 2181 * where we would end up with LOTS of zombies. 2182 */ 2183 if (apr_pool_create(&sub_context, r->pool) != APR_SUCCESS) 2184 return -1; 2185 2186 if ((rv = create_uncompress_child(&parm, sub_context, &pipe_out)) != APR_SUCCESS) { 2187 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01553) 2188 MODNAME ": couldn't spawn uncompress process: %s", r->uri); 2189 return -1; 2190 } 2191 2192 *newch = (unsigned char *) apr_palloc(r->pool, n); 2193 rv = apr_file_read(pipe_out, *newch, &n); 2194 if (n == 0) { 2195 apr_pool_destroy(sub_context); 2196 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01554) 2197 MODNAME ": read failed from uncompress of %s", r->filename); 2198 return -1; 2199 } 2200 apr_pool_destroy(sub_context); 2201 return n; 2202} 2203 2204/* 2205 * is_tar() -- figure out whether file is a tar archive. 2206 * 2207 * Stolen (by author of file utility) from the public domain tar program: Public 2208 * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). 2209 * 2210 * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7 2211 * 1997/06/24 00:41:02 ikluft Exp ikluft $ 2212 * 2213 * Comments changed and some code/comments reformatted for file command by Ian 2214 * Darwin. 2215 */ 2216 2217#define isodigit(c) (((unsigned char)(c) >= '0') && ((unsigned char)(c) <= '7')) 2218 2219/* 2220 * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for 2221 * old UNIX tar file, 2 for Unix Std (POSIX) tar file. 2222 */ 2223 2224static int is_tar(unsigned char *buf, apr_size_t nbytes) 2225{ 2226 register union record *header = (union record *) buf; 2227 register int i; 2228 register long sum, recsum; 2229 register char *p; 2230 2231 if (nbytes < sizeof(union record)) 2232 return 0; 2233 2234 recsum = from_oct(8, header->header.chksum); 2235 2236 sum = 0; 2237 p = header->charptr; 2238 for (i = sizeof(union record); --i >= 0;) { 2239 /* 2240 * We can't use unsigned char here because of old compilers, e.g. V7. 2241 */ 2242 sum += 0xFF & *p++; 2243 } 2244 2245 /* Adjust checksum to count the "chksum" field as blanks. */ 2246 for (i = sizeof(header->header.chksum); --i >= 0;) 2247 sum -= 0xFF & header->header.chksum[i]; 2248 sum += ' ' * sizeof header->header.chksum; 2249 2250 if (sum != recsum) 2251 return 0; /* Not a tar archive */ 2252 2253 if (0 == strcmp(header->header.magic, TMAGIC)) 2254 return 2; /* Unix Standard tar archive */ 2255 2256 return 1; /* Old fashioned tar archive */ 2257} 2258 2259 2260/* 2261 * Quick and dirty octal conversion. 2262 * 2263 * Result is -1 if the field is invalid (all blank, or nonoctal). 2264 */ 2265static long from_oct(int digs, char *where) 2266{ 2267 register long value; 2268 2269 while (apr_isspace(*where)) { /* Skip spaces */ 2270 where++; 2271 if (--digs <= 0) 2272 return -1; /* All blank field */ 2273 } 2274 value = 0; 2275 while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ 2276 value = (value << 3) | (*where++ - '0'); 2277 --digs; 2278 } 2279 2280 if (digs > 0 && *where && !apr_isspace(*where)) 2281 return -1; /* Ended on non-space/nul */ 2282 2283 return value; 2284} 2285 2286/* 2287 * Check for file-revision suffix 2288 * 2289 * This is for an obscure document control system used on an intranet. 2290 * The web representation of each file's revision has an @1, @2, etc 2291 * appended with the revision number. This needs to be stripped off to 2292 * find the file suffix, which can be recognized by sending the name back 2293 * through a sub-request. The base file name (without the @num suffix) 2294 * must exist because its type will be used as the result. 2295 */ 2296static int revision_suffix(request_rec *r) 2297{ 2298 int suffix_pos, result; 2299 char *sub_filename; 2300 request_rec *sub; 2301 2302#if MIME_MAGIC_DEBUG 2303 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01555) 2304 MODNAME ": revision_suffix checking %s", r->filename); 2305#endif /* MIME_MAGIC_DEBUG */ 2306 2307 /* check for recognized revision suffix */ 2308 suffix_pos = strlen(r->filename) - 1; 2309 if (!apr_isdigit(r->filename[suffix_pos])) { 2310 return 0; 2311 } 2312 while (suffix_pos >= 0 && apr_isdigit(r->filename[suffix_pos])) 2313 suffix_pos--; 2314 if (suffix_pos < 0 || r->filename[suffix_pos] != '@') { 2315 return 0; 2316 } 2317 2318 /* perform sub-request for the file name without the suffix */ 2319 result = 0; 2320 sub_filename = apr_pstrndup(r->pool, r->filename, suffix_pos); 2321#if MIME_MAGIC_DEBUG 2322 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01556) 2323 MODNAME ": subrequest lookup for %s", sub_filename); 2324#endif /* MIME_MAGIC_DEBUG */ 2325 sub = ap_sub_req_lookup_file(sub_filename, r, NULL); 2326 2327 /* extract content type/encoding/language from sub-request */ 2328 if (sub->content_type) { 2329 ap_set_content_type(r, apr_pstrdup(r->pool, sub->content_type)); 2330#if MIME_MAGIC_DEBUG 2331 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01557) 2332 MODNAME ": subrequest %s got %s", 2333 sub_filename, r->content_type); 2334#endif /* MIME_MAGIC_DEBUG */ 2335 if (sub->content_encoding) 2336 r->content_encoding = 2337 apr_pstrdup(r->pool, sub->content_encoding); 2338 if (sub->content_languages) { 2339 int n; 2340 r->content_languages = apr_array_copy(r->pool, 2341 sub->content_languages); 2342 for (n = 0; n < r->content_languages->nelts; ++n) { 2343 char **lang = ((char **)r->content_languages->elts) + n; 2344 *lang = apr_pstrdup(r->pool, *lang); 2345 } 2346 } 2347 result = 1; 2348 } 2349 2350 /* clean up */ 2351 ap_destroy_sub_req(sub); 2352 2353 return result; 2354} 2355 2356/* 2357 * initialize the module 2358 */ 2359static int magic_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server) 2360{ 2361 int result; 2362 magic_server_config_rec *conf; 2363 magic_server_config_rec *main_conf; 2364 server_rec *s; 2365#if MIME_MAGIC_DEBUG 2366 struct magic *m, *prevm; 2367#endif /* MIME_MAGIC_DEBUG */ 2368 2369 main_conf = ap_get_module_config(main_server->module_config, &mime_magic_module); 2370 for (s = main_server; s; s = s->next) { 2371 conf = ap_get_module_config(s->module_config, &mime_magic_module); 2372 if (conf->magicfile == NULL && s != main_server) { 2373 /* inherits from the parent */ 2374 *conf = *main_conf; 2375 } 2376 else if (conf->magicfile) { 2377 result = apprentice(s, p); 2378 if (result == -1) 2379 return OK; 2380#if MIME_MAGIC_DEBUG 2381 prevm = 0; 2382 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01558) 2383 MODNAME ": magic_init 1 test"); 2384 for (m = conf->magic; m; m = m->next) { 2385 if (apr_isprint((((unsigned long) m) >> 24) & 255) && 2386 apr_isprint((((unsigned long) m) >> 16) & 255) && 2387 apr_isprint((((unsigned long) m) >> 8) & 255) && 2388 apr_isprint(((unsigned long) m) & 255)) { 2389 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s, APLOGNO(01559) 2390 MODNAME ": magic_init 1: POINTER CLOBBERED! " 2391 "m=\"%c%c%c%c\" line=%d", 2392 (((unsigned long) m) >> 24) & 255, 2393 (((unsigned long) m) >> 16) & 255, 2394 (((unsigned long) m) >> 8) & 255, 2395 ((unsigned long) m) & 255, 2396 prevm ? prevm->lineno : -1); 2397 break; 2398 } 2399 prevm = m; 2400 } 2401#endif 2402 } 2403 } 2404 return OK; 2405} 2406 2407/* 2408 * Find the Content-Type from any resource this module has available 2409 */ 2410 2411static int magic_find_ct(request_rec *r) 2412{ 2413 int result; 2414 magic_server_config_rec *conf; 2415 2416 /* the file has to exist */ 2417 if (r->finfo.filetype == APR_NOFILE || !r->filename) { 2418 return DECLINED; 2419 } 2420 2421 /* was someone else already here? */ 2422 if (r->content_type) { 2423 return DECLINED; 2424 } 2425 2426 conf = ap_get_module_config(r->server->module_config, &mime_magic_module); 2427 if (!conf || !conf->magic) { 2428 return DECLINED; 2429 } 2430 2431 /* initialize per-request info */ 2432 if (!magic_set_config(r)) { 2433 return HTTP_INTERNAL_SERVER_ERROR; 2434 } 2435 2436 /* try excluding file-revision suffixes */ 2437 if (revision_suffix(r) != 1) { 2438 /* process it based on the file contents */ 2439 if ((result = magic_process(r)) != OK) { 2440 return result; 2441 } 2442 } 2443 2444 /* if we have any results, put them in the request structure */ 2445 return magic_rsl_to_request(r); 2446} 2447 2448static void register_hooks(apr_pool_t *p) 2449{ 2450 static const char * const aszPre[]={ "mod_mime.c", NULL }; 2451 2452 /* mod_mime_magic should be run after mod_mime, if at all. */ 2453 2454 ap_hook_type_checker(magic_find_ct, aszPre, NULL, APR_HOOK_MIDDLE); 2455 ap_hook_post_config(magic_init, NULL, NULL, APR_HOOK_FIRST); 2456} 2457 2458/* 2459 * Apache API module interface 2460 */ 2461 2462AP_DECLARE_MODULE(mime_magic) = 2463{ 2464 STANDARD20_MODULE_STUFF, 2465 NULL, /* dir config creator */ 2466 NULL, /* dir merger --- default is to override */ 2467 create_magic_server_config, /* server config */ 2468 merge_magic_server_config, /* merge server config */ 2469 mime_magic_cmds, /* command apr_table_t */ 2470 register_hooks /* register hooks */ 2471}; 2472