1/* $NetBSD: apprentice.c,v 1.6 2012/02/22 17:53:51 christos Exp $ */ 2 3/* 4 * Copyright (c) Ian F. Darwin 1986-1995. 5 * Software written by Ian F. Darwin and others; 6 * maintained 1995-present by Christos Zoulas and others. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30/* 31 * apprentice - make one pass through /etc/magic, learning its secrets. 32 */ 33 34#include "file.h" 35 36#ifndef lint 37#if 0 38FILE_RCSID("@(#)$File: apprentice.c,v 1.173 2011/12/08 12:38:24 rrt Exp $") 39#else 40__RCSID("$NetBSD: apprentice.c,v 1.6 2012/02/22 17:53:51 christos Exp $"); 41#endif 42#endif /* lint */ 43 44#include "magic.h" 45#include <stdlib.h> 46#ifdef HAVE_UNISTD_H 47#include <unistd.h> 48#endif 49#include <string.h> 50#include <assert.h> 51#include <ctype.h> 52#include <fcntl.h> 53#ifdef QUICK 54#include <sys/mman.h> 55#endif 56#include <dirent.h> 57 58#define EATAB {while (isascii((unsigned char) *l) && \ 59 isspace((unsigned char) *l)) ++l;} 60#define LOWCASE(l) (isupper((unsigned char) (l)) ? \ 61 tolower((unsigned char) (l)) : (l)) 62/* 63 * Work around a bug in headers on Digital Unix. 64 * At least confirmed for: OSF1 V4.0 878 65 */ 66#if defined(__osf__) && defined(__DECC) 67#ifdef MAP_FAILED 68#undef MAP_FAILED 69#endif 70#endif 71 72#ifndef MAP_FAILED 73#define MAP_FAILED (void *) -1 74#endif 75 76#ifndef MAP_FILE 77#define MAP_FILE 0 78#endif 79 80struct magic_entry { 81 struct magic *mp; 82 uint32_t cont_count; 83 uint32_t max_count; 84}; 85 86int file_formats[FILE_NAMES_SIZE]; 87const size_t file_nformats = FILE_NAMES_SIZE; 88const char *file_names[FILE_NAMES_SIZE]; 89const size_t file_nnames = FILE_NAMES_SIZE; 90 91private int getvalue(struct magic_set *ms, struct magic *, const char **, int); 92private int hextoint(int); 93private const char *getstr(struct magic_set *, struct magic *, const char *, 94 int); 95private int parse(struct magic_set *, struct magic_entry **, uint32_t *, 96 const char *, size_t, int); 97private void eatsize(const char **); 98private int apprentice_1(struct magic_set *, const char *, int, struct mlist *); 99private size_t apprentice_magic_strength(const struct magic *); 100private int apprentice_sort(const void *, const void *); 101private void apprentice_list(struct mlist *, int ); 102private int apprentice_load(struct magic_set *, struct magic **, uint32_t *, 103 const char *, int); 104private void byteswap(struct magic *, uint32_t); 105private void bs1(struct magic *); 106private uint16_t swap2(uint16_t); 107private uint32_t swap4(uint32_t); 108private uint64_t swap8(uint64_t); 109private char *mkdbname(struct magic_set *, const char *, int); 110private int apprentice_map(struct magic_set *, struct magic **, uint32_t *, 111 const char *); 112private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *, 113 const char *); 114private int check_format_type(const char *, int); 115private int check_format(struct magic_set *, struct magic *); 116private int get_op(char); 117private int parse_mime(struct magic_set *, struct magic_entry *, const char *); 118private int parse_strength(struct magic_set *, struct magic_entry *, const char *); 119private int parse_apple(struct magic_set *, struct magic_entry *, const char *); 120 121 122private size_t maxmagic = 0; 123private size_t magicsize = sizeof(struct magic); 124 125private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; 126 127private struct { 128 const char *name; 129 size_t len; 130 int (*fun)(struct magic_set *, struct magic_entry *, const char *); 131} bang[] = { 132#define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name } 133 DECLARE_FIELD(mime), 134 DECLARE_FIELD(apple), 135 DECLARE_FIELD(strength), 136#undef DECLARE_FIELD 137 { NULL, 0, NULL } 138}; 139 140#ifdef COMPILE_ONLY 141 142int main(int, char *[]); 143 144int 145main(int argc, char *argv[]) 146{ 147 int ret; 148 struct magic_set *ms; 149 char *progname; 150 151 if ((progname = strrchr(argv[0], '/')) != NULL) 152 progname++; 153 else 154 progname = argv[0]; 155 156 if (argc != 2) { 157 (void)fprintf(stderr, "Usage: %s file\n", progname); 158 return 1; 159 } 160 161 if ((ms = magic_open(MAGIC_CHECK)) == NULL) { 162 (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); 163 return 1; 164 } 165 ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; 166 if (ret == 1) 167 (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); 168 magic_close(ms); 169 return ret; 170} 171#endif /* COMPILE_ONLY */ 172 173static const struct type_tbl_s { 174 const char name[16]; 175 const size_t len; 176 const int type; 177 const int format; 178} type_tbl[] = { 179# define XX(s) s, (sizeof(s) - 1) 180# define XX_NULL "", 0 181 { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, 182 { XX("short"), FILE_SHORT, FILE_FMT_NUM }, 183 { XX("default"), FILE_DEFAULT, FILE_FMT_STR }, 184 { XX("long"), FILE_LONG, FILE_FMT_NUM }, 185 { XX("string"), FILE_STRING, FILE_FMT_STR }, 186 { XX("date"), FILE_DATE, FILE_FMT_STR }, 187 { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, 188 { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, 189 { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, 190 { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, 191 { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, 192 { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, 193 { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, 194 { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, 195 { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, 196 { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, 197 { XX("regex"), FILE_REGEX, FILE_FMT_STR }, 198 { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, 199 { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, 200 { XX("search"), FILE_SEARCH, FILE_FMT_STR }, 201 { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, 202 { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, 203 { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, 204 { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, 205 { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, 206 { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, 207 { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, 208 { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, 209 { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, 210 { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, 211 { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, 212 { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, 213 { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, 214 { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, 215 { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, 216 { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, 217 { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, 218 { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, 219 { XX("leid3"), FILE_LEID3, FILE_FMT_NUM }, 220 { XX("beid3"), FILE_BEID3, FILE_FMT_NUM }, 221 { XX("indirect"), FILE_INDIRECT, FILE_FMT_NONE }, 222 { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, 223# undef XX 224# undef XX_NULL 225}; 226 227private int 228get_type(const char *l, const char **t) 229{ 230 const struct type_tbl_s *p; 231 232 for (p = type_tbl; p->len; p++) { 233 if (strncmp(l, p->name, p->len) == 0) { 234 if (t) 235 *t = l + p->len; 236 break; 237 } 238 } 239 return p->type; 240} 241 242private void 243init_file_tables(void) 244{ 245 static int done = 0; 246 const struct type_tbl_s *p; 247 248 if (done) 249 return; 250 done++; 251 252 for (p = type_tbl; p->len; p++) { 253 assert(p->type < FILE_NAMES_SIZE); 254 file_names[p->type] = p->name; 255 file_formats[p->type] = p->format; 256 } 257} 258 259/* 260 * Handle one file or directory. 261 */ 262private int 263apprentice_1(struct magic_set *ms, const char *fn, int action, 264 struct mlist *mlist) 265{ 266 struct magic *magic = NULL; 267 uint32_t nmagic = 0; 268 struct mlist *ml; 269 int rv = -1; 270 int mapped; 271 272 if (magicsize != FILE_MAGICSIZE) { 273 file_error(ms, 0, "magic element size %lu != %lu", 274 (unsigned long)sizeof(*magic), 275 (unsigned long)FILE_MAGICSIZE); 276 return -1; 277 } 278 279 if (action == FILE_COMPILE) { 280 rv = apprentice_load(ms, &magic, &nmagic, fn, action); 281 if (rv != 0) 282 return -1; 283 rv = apprentice_compile(ms, &magic, &nmagic, fn); 284 free(magic); 285 return rv; 286 } 287 288#ifndef COMPILE_ONLY 289 if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) { 290 if (ms->flags & MAGIC_CHECK) 291 file_magwarn(ms, "using regular magic file `%s'", fn); 292 rv = apprentice_load(ms, &magic, &nmagic, fn, action); 293 if (rv != 0) 294 return -1; 295 } 296 297 mapped = rv; 298 299 if (magic == NULL) { 300 file_delmagic(magic, mapped, nmagic); 301 return -1; 302 } 303 304 if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) { 305 file_delmagic(magic, mapped, nmagic); 306 file_oomem(ms, sizeof(*ml)); 307 return -1; 308 } 309 310 ml->magic = magic; 311 ml->nmagic = nmagic; 312 ml->mapped = mapped; 313 314 mlist->prev->next = ml; 315 ml->prev = mlist->prev; 316 ml->next = mlist; 317 mlist->prev = ml; 318 319 if (action == FILE_LIST) { 320 printf("Binary patterns:\n"); 321 apprentice_list(mlist, BINTEST); 322 printf("Text patterns:\n"); 323 apprentice_list(mlist, TEXTTEST); 324 } 325 326 return 0; 327#endif /* COMPILE_ONLY */ 328} 329 330protected void 331file_delmagic(struct magic *p, int type, size_t entries) 332{ 333 if (p == NULL) 334 return; 335 switch (type) { 336 case 2: 337#ifdef QUICK 338 p--; 339 (void)munmap((void *)p, sizeof(*p) * (entries + 1)); 340 break; 341#else 342 (void)&entries; 343 abort(); 344 /*NOTREACHED*/ 345#endif 346 case 1: 347 p--; 348 /*FALLTHROUGH*/ 349 case 0: 350 free(p); 351 break; 352 default: 353 abort(); 354 } 355} 356 357/* const char *fn: list of magic files and directories */ 358protected struct mlist * 359file_apprentice(struct magic_set *ms, const char *fn, int action) 360{ 361 char *p, *mfn; 362 int file_err, errs = -1; 363 struct mlist *mlist; 364 365 if ((fn = magic_getpath(fn, action)) == NULL) 366 return NULL; 367 368 init_file_tables(); 369 370 if ((mfn = strdup(fn)) == NULL) { 371 file_oomem(ms, strlen(fn)); 372 return NULL; 373 } 374 fn = mfn; 375 376 if ((mlist = CAST(struct mlist *, malloc(sizeof(*mlist)))) == NULL) { 377 free(mfn); 378 file_oomem(ms, sizeof(*mlist)); 379 return NULL; 380 } 381 mlist->next = mlist->prev = mlist; 382 383 while (fn) { 384 p = strchr(fn, PATHSEP); 385 if (p) 386 *p++ = '\0'; 387 if (*fn == '\0') 388 break; 389 file_err = apprentice_1(ms, fn, action, mlist); 390 errs = MAX(errs, file_err); 391 fn = p; 392 } 393 if (errs == -1) { 394 free(mfn); 395 free(mlist); 396 mlist = NULL; 397 file_error(ms, 0, "could not find any magic files!"); 398 return NULL; 399 } 400 free(mfn); 401 return mlist; 402} 403 404/* 405 * Get weight of this magic entry, for sorting purposes. 406 */ 407private size_t 408apprentice_magic_strength(const struct magic *m) 409{ 410#define MULT 10 411 size_t val = 2 * MULT; /* baseline strength */ 412 413 switch (m->type) { 414 case FILE_DEFAULT: /* make sure this sorts last */ 415 if (m->factor_op != FILE_FACTOR_OP_NONE) 416 abort(); 417 return 0; 418 419 case FILE_BYTE: 420 val += 1 * MULT; 421 break; 422 423 case FILE_SHORT: 424 case FILE_LESHORT: 425 case FILE_BESHORT: 426 val += 2 * MULT; 427 break; 428 429 case FILE_LONG: 430 case FILE_LELONG: 431 case FILE_BELONG: 432 case FILE_MELONG: 433 val += 4 * MULT; 434 break; 435 436 case FILE_PSTRING: 437 case FILE_STRING: 438 val += m->vallen * MULT; 439 break; 440 441 case FILE_BESTRING16: 442 case FILE_LESTRING16: 443 val += m->vallen * MULT / 2; 444 break; 445 446 case FILE_SEARCH: 447 case FILE_REGEX: 448 val += m->vallen * MAX(MULT / m->vallen, 1); 449 break; 450 451 case FILE_DATE: 452 case FILE_LEDATE: 453 case FILE_BEDATE: 454 case FILE_MEDATE: 455 case FILE_LDATE: 456 case FILE_LELDATE: 457 case FILE_BELDATE: 458 case FILE_MELDATE: 459 case FILE_FLOAT: 460 case FILE_BEFLOAT: 461 case FILE_LEFLOAT: 462 val += 4 * MULT; 463 break; 464 465 case FILE_QUAD: 466 case FILE_BEQUAD: 467 case FILE_LEQUAD: 468 case FILE_QDATE: 469 case FILE_LEQDATE: 470 case FILE_BEQDATE: 471 case FILE_QLDATE: 472 case FILE_LEQLDATE: 473 case FILE_BEQLDATE: 474 case FILE_DOUBLE: 475 case FILE_BEDOUBLE: 476 case FILE_LEDOUBLE: 477 val += 8 * MULT; 478 break; 479 480 default: 481 val = 0; 482 (void)fprintf(stderr, "Bad type %d\n", m->type); 483 abort(); 484 } 485 486 switch (m->reln) { 487 case 'x': /* matches anything penalize */ 488 case '!': /* matches almost anything penalize */ 489 val = 0; 490 break; 491 492 case '=': /* Exact match, prefer */ 493 val += MULT; 494 break; 495 496 case '>': 497 case '<': /* comparison match reduce strength */ 498 val -= 2 * MULT; 499 break; 500 501 case '^': 502 case '&': /* masking bits, we could count them too */ 503 val -= MULT; 504 break; 505 506 default: 507 (void)fprintf(stderr, "Bad relation %c\n", m->reln); 508 abort(); 509 } 510 511 if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ 512 val = 1; 513 514 switch (m->factor_op) { 515 case FILE_FACTOR_OP_NONE: 516 break; 517 case FILE_FACTOR_OP_PLUS: 518 val += m->factor; 519 break; 520 case FILE_FACTOR_OP_MINUS: 521 val -= m->factor; 522 break; 523 case FILE_FACTOR_OP_TIMES: 524 val *= m->factor; 525 break; 526 case FILE_FACTOR_OP_DIV: 527 val /= m->factor; 528 break; 529 default: 530 abort(); 531 } 532 533 /* 534 * Magic entries with no description get a bonus because they depend 535 * on subsequent magic entries to print something. 536 */ 537 if (m->desc[0] == '\0') 538 val++; 539 return val; 540} 541 542/* 543 * Sort callback for sorting entries by "strength" (basically length) 544 */ 545private int 546apprentice_sort(const void *a, const void *b) 547{ 548 const struct magic_entry *ma = CAST(const struct magic_entry *, a); 549 const struct magic_entry *mb = CAST(const struct magic_entry *, b); 550 size_t sa = apprentice_magic_strength(ma->mp); 551 size_t sb = apprentice_magic_strength(mb->mp); 552 if (sa == sb) 553 return 0; 554 else if (sa > sb) 555 return -1; 556 else 557 return 1; 558} 559 560/* 561 * Shows sorted patterns list in the order which is used for the matching 562 */ 563private void 564apprentice_list(struct mlist *mlist, int mode) 565{ 566 uint32_t magindex = 0; 567 struct mlist *ml; 568 for (ml = mlist->next; ml != mlist; ml = ml->next) { 569 for (magindex = 0; magindex < ml->nmagic; magindex++) { 570 struct magic *m = &ml->magic[magindex]; 571 if ((m->flag & mode) != mode) { 572 /* Skip sub-tests */ 573 while (magindex + 1 < ml->nmagic && 574 ml->magic[magindex + 1].cont_level != 0) 575 ++magindex; 576 continue; /* Skip to next top-level test*/ 577 } 578 579 /* 580 * Try to iterate over the tree until we find item with 581 * description/mimetype. 582 */ 583 while (magindex + 1 < ml->nmagic && 584 ml->magic[magindex + 1].cont_level != 0 && 585 *ml->magic[magindex].desc == '\0' && 586 *ml->magic[magindex].mimetype == '\0') 587 magindex++; 588 589 printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n", 590 apprentice_magic_strength(m), 591 ml->magic[magindex].desc, 592 ml->magic[magindex].mimetype); 593 } 594 } 595} 596 597private void 598set_test_type(struct magic *mstart, struct magic *m) 599{ 600 switch (m->type) { 601 case FILE_BYTE: 602 case FILE_SHORT: 603 case FILE_LONG: 604 case FILE_DATE: 605 case FILE_BESHORT: 606 case FILE_BELONG: 607 case FILE_BEDATE: 608 case FILE_LESHORT: 609 case FILE_LELONG: 610 case FILE_LEDATE: 611 case FILE_LDATE: 612 case FILE_BELDATE: 613 case FILE_LELDATE: 614 case FILE_MEDATE: 615 case FILE_MELDATE: 616 case FILE_MELONG: 617 case FILE_QUAD: 618 case FILE_LEQUAD: 619 case FILE_BEQUAD: 620 case FILE_QDATE: 621 case FILE_LEQDATE: 622 case FILE_BEQDATE: 623 case FILE_QLDATE: 624 case FILE_LEQLDATE: 625 case FILE_BEQLDATE: 626 case FILE_FLOAT: 627 case FILE_BEFLOAT: 628 case FILE_LEFLOAT: 629 case FILE_DOUBLE: 630 case FILE_BEDOUBLE: 631 case FILE_LEDOUBLE: 632 mstart->flag |= BINTEST; 633 break; 634 case FILE_STRING: 635 case FILE_PSTRING: 636 case FILE_BESTRING16: 637 case FILE_LESTRING16: 638 /* Allow text overrides */ 639 if (mstart->str_flags & STRING_TEXTTEST) 640 mstart->flag |= TEXTTEST; 641 else 642 mstart->flag |= BINTEST; 643 break; 644 case FILE_REGEX: 645 case FILE_SEARCH: 646 /* Check for override */ 647 if (mstart->str_flags & STRING_BINTEST) 648 mstart->flag |= BINTEST; 649 if (mstart->str_flags & STRING_TEXTTEST) 650 mstart->flag |= TEXTTEST; 651 652 if (mstart->flag & (TEXTTEST|BINTEST)) 653 break; 654 655 /* binary test if pattern is not text */ 656 if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL, 657 NULL) <= 0) 658 mstart->flag |= BINTEST; 659 else 660 mstart->flag |= TEXTTEST; 661 break; 662 case FILE_DEFAULT: 663 /* can't deduce anything; we shouldn't see this at the 664 top level anyway */ 665 break; 666 case FILE_INVALID: 667 default: 668 /* invalid search type, but no need to complain here */ 669 break; 670 } 671} 672 673/* 674 * Load and parse one file. 675 */ 676private void 677load_1(struct magic_set *ms, int action, const char *fn, int *errs, 678 struct magic_entry **marray, uint32_t *marraycount) 679{ 680 size_t lineno = 0, llen = 0; 681 char *line = NULL; 682 ssize_t len; 683 684 FILE *f = fopen(ms->file = fn, "r"); 685 if (f == NULL) { 686 if (errno != ENOENT) 687 file_error(ms, errno, "cannot read magic file `%s'", 688 fn); 689 (*errs)++; 690 return; 691 } 692 693 /* read and parse this file */ 694 for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; 695 ms->line++) { 696 if (len == 0) /* null line, garbage, etc */ 697 continue; 698 if (line[len - 1] == '\n') { 699 lineno++; 700 line[len - 1] = '\0'; /* delete newline */ 701 } 702 switch (line[0]) { 703 case '\0': /* empty, do not parse */ 704 case '#': /* comment, do not parse */ 705 continue; 706 case '!': 707 if (line[1] == ':') { 708 size_t i; 709 710 for (i = 0; bang[i].name != NULL; i++) { 711 if ((size_t)(len - 2) > bang[i].len && 712 memcmp(bang[i].name, line + 2, 713 bang[i].len) == 0) 714 break; 715 } 716 if (bang[i].name == NULL) { 717 file_error(ms, 0, 718 "Unknown !: entry `%s'", line); 719 (*errs)++; 720 continue; 721 } 722 if (*marraycount == 0) { 723 file_error(ms, 0, 724 "No current entry for :!%s type", 725 bang[i].name); 726 (*errs)++; 727 continue; 728 } 729 if ((*bang[i].fun)(ms, 730 &(*marray)[*marraycount - 1], 731 line + bang[i].len + 2) != 0) { 732 (*errs)++; 733 continue; 734 } 735 continue; 736 } 737 /*FALLTHROUGH*/ 738 default: 739 if (parse(ms, marray, marraycount, line, lineno, 740 action) != 0) 741 (*errs)++; 742 break; 743 } 744 } 745 free(line); 746 (void)fclose(f); 747} 748 749/* 750 * parse a file or directory of files 751 * const char *fn: name of magic file or directory 752 */ 753private int 754cmpstrp(const void *p1, const void *p2) 755{ 756 return strcmp(*(char *const *)p1, *(char *const *)p2); 757} 758 759private int 760apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 761 const char *fn, int action) 762{ 763 int errs = 0; 764 struct magic_entry *marray; 765 uint32_t marraycount, i, mentrycount = 0, starttest; 766 size_t slen, files = 0, maxfiles = 0; 767 char **filearr = NULL, *mfn; 768 struct stat st; 769 DIR *dir; 770 struct dirent *d; 771 772 ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ 773 774 maxmagic = MAXMAGIS; 775 if ((marray = CAST(struct magic_entry *, calloc(maxmagic, 776 sizeof(*marray)))) == NULL) { 777 file_oomem(ms, maxmagic * sizeof(*marray)); 778 return -1; 779 } 780 marraycount = 0; 781 782 /* print silly verbose header for USG compat. */ 783 if (action == FILE_CHECK) 784 (void)fprintf(stderr, "%s\n", usg_hdr); 785 786 /* load directory or file */ 787 if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { 788 dir = opendir(fn); 789 if (!dir) { 790 errs++; 791 goto out; 792 } 793 while ((d = readdir(dir)) != NULL) { 794 if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { 795 file_oomem(ms, 796 strlen(fn) + strlen(d->d_name) + 2); 797 errs++; 798 closedir(dir); 799 goto out; 800 } 801 if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { 802 free(mfn); 803 continue; 804 } 805 if (files >= maxfiles) { 806 size_t mlen; 807 maxfiles = (maxfiles + 1) * 2; 808 mlen = maxfiles * sizeof(*filearr); 809 if ((filearr = CAST(char **, 810 realloc(filearr, mlen))) == NULL) { 811 file_oomem(ms, mlen); 812 free(mfn); 813 closedir(dir); 814 errs++; 815 goto out; 816 } 817 } 818 filearr[files++] = mfn; 819 } 820 closedir(dir); 821 qsort(filearr, files, sizeof(*filearr), cmpstrp); 822 for (i = 0; i < files; i++) { 823 load_1(ms, action, filearr[i], &errs, &marray, 824 &marraycount); 825 free(filearr[i]); 826 } 827 free(filearr); 828 } else 829 load_1(ms, action, fn, &errs, &marray, &marraycount); 830 if (errs) 831 goto out; 832 833 /* Set types of tests */ 834 for (i = 0; i < marraycount; ) { 835 if (marray[i].mp->cont_level != 0) { 836 i++; 837 continue; 838 } 839 840 starttest = i; 841 do { 842 static const char text[] = "text"; 843 static const char binary[] = "binary"; 844 static const size_t len = sizeof(text); 845 set_test_type(marray[starttest].mp, marray[i].mp); 846 if ((ms->flags & MAGIC_DEBUG) == 0) 847 continue; 848 (void)fprintf(stderr, "%s%s%s: %s\n", 849 marray[i].mp->mimetype, 850 marray[i].mp->mimetype[0] == '\0' ? "" : "; ", 851 marray[i].mp->desc[0] ? marray[i].mp->desc : 852 "(no description)", 853 marray[i].mp->flag & BINTEST ? binary : text); 854 if (marray[i].mp->flag & BINTEST) { 855 char *p = strstr(marray[i].mp->desc, text); 856 if (p && (p == marray[i].mp->desc || 857 isspace((unsigned char)p[-1])) && 858 (p + len - marray[i].mp->desc == 859 MAXstring || (p[len] == '\0' || 860 isspace((unsigned char)p[len])))) 861 (void)fprintf(stderr, "*** Possible " 862 "binary test for text type\n"); 863 } 864 } while (++i < marraycount && marray[i].mp->cont_level != 0); 865 } 866 867 qsort(marray, marraycount, sizeof(*marray), apprentice_sort); 868 869 /* 870 * Make sure that any level 0 "default" line is last (if one exists). 871 */ 872 for (i = 0; i < marraycount; i++) { 873 if (marray[i].mp->cont_level == 0 && 874 marray[i].mp->type == FILE_DEFAULT) { 875 while (++i < marraycount) 876 if (marray[i].mp->cont_level == 0) 877 break; 878 if (i != marraycount) { 879 /* XXX - Ugh! */ 880 ms->line = marray[i].mp->lineno; 881 file_magwarn(ms, 882 "level 0 \"default\" did not sort last"); 883 } 884 break; 885 } 886 } 887 888 for (i = 0; i < marraycount; i++) 889 mentrycount += marray[i].cont_count; 890 891 slen = sizeof(**magicp) * mentrycount; 892 if ((*magicp = CAST(struct magic *, malloc(slen))) == NULL) { 893 file_oomem(ms, slen); 894 errs++; 895 goto out; 896 } 897 898 mentrycount = 0; 899 for (i = 0; i < marraycount; i++) { 900 (void)memcpy(*magicp + mentrycount, marray[i].mp, 901 marray[i].cont_count * sizeof(**magicp)); 902 mentrycount += marray[i].cont_count; 903 } 904out: 905 for (i = 0; i < marraycount; i++) 906 free(marray[i].mp); 907 free(marray); 908 if (errs) { 909 *magicp = NULL; 910 *nmagicp = 0; 911 return errs; 912 } else { 913 *nmagicp = mentrycount; 914 return 0; 915 } 916 917} 918 919/* 920 * extend the sign bit if the comparison is to be signed 921 */ 922protected uint64_t 923file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) 924{ 925 if (!(m->flag & UNSIGNED)) { 926 switch(m->type) { 927 /* 928 * Do not remove the casts below. They are 929 * vital. When later compared with the data, 930 * the sign extension must have happened. 931 */ 932 case FILE_BYTE: 933 v = (char) v; 934 break; 935 case FILE_SHORT: 936 case FILE_BESHORT: 937 case FILE_LESHORT: 938 v = (short) v; 939 break; 940 case FILE_DATE: 941 case FILE_BEDATE: 942 case FILE_LEDATE: 943 case FILE_MEDATE: 944 case FILE_LDATE: 945 case FILE_BELDATE: 946 case FILE_LELDATE: 947 case FILE_MELDATE: 948 case FILE_LONG: 949 case FILE_BELONG: 950 case FILE_LELONG: 951 case FILE_MELONG: 952 case FILE_FLOAT: 953 case FILE_BEFLOAT: 954 case FILE_LEFLOAT: 955 v = (int32_t) v; 956 break; 957 case FILE_QUAD: 958 case FILE_BEQUAD: 959 case FILE_LEQUAD: 960 case FILE_QDATE: 961 case FILE_QLDATE: 962 case FILE_BEQDATE: 963 case FILE_BEQLDATE: 964 case FILE_LEQDATE: 965 case FILE_LEQLDATE: 966 case FILE_DOUBLE: 967 case FILE_BEDOUBLE: 968 case FILE_LEDOUBLE: 969 v = (int64_t) v; 970 break; 971 case FILE_STRING: 972 case FILE_PSTRING: 973 case FILE_BESTRING16: 974 case FILE_LESTRING16: 975 case FILE_REGEX: 976 case FILE_SEARCH: 977 case FILE_DEFAULT: 978 case FILE_INDIRECT: 979 break; 980 default: 981 if (ms->flags & MAGIC_CHECK) 982 file_magwarn(ms, "cannot happen: m->type=%d\n", 983 m->type); 984 return ~0U; 985 } 986 } 987 return v; 988} 989 990private int 991string_modifier_check(struct magic_set *ms, struct magic *m) 992{ 993 if ((ms->flags & MAGIC_CHECK) == 0) 994 return 0; 995 996 if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { 997 file_magwarn(ms, 998 "'/BHhLl' modifiers are only allowed for pascal strings\n"); 999 return -1; 1000 } 1001 switch (m->type) { 1002 case FILE_BESTRING16: 1003 case FILE_LESTRING16: 1004 if (m->str_flags != 0) { 1005 file_magwarn(ms, 1006 "no modifiers allowed for 16-bit strings\n"); 1007 return -1; 1008 } 1009 break; 1010 case FILE_STRING: 1011 case FILE_PSTRING: 1012 if ((m->str_flags & REGEX_OFFSET_START) != 0) { 1013 file_magwarn(ms, 1014 "'/%c' only allowed on regex and search\n", 1015 CHAR_REGEX_OFFSET_START); 1016 return -1; 1017 } 1018 break; 1019 case FILE_SEARCH: 1020 if (m->str_range == 0) { 1021 file_magwarn(ms, 1022 "missing range; defaulting to %d\n", 1023 STRING_DEFAULT_RANGE); 1024 m->str_range = STRING_DEFAULT_RANGE; 1025 return -1; 1026 } 1027 break; 1028 case FILE_REGEX: 1029 if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { 1030 file_magwarn(ms, "'/%c' not allowed on regex\n", 1031 CHAR_COMPACT_WHITESPACE); 1032 return -1; 1033 } 1034 if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { 1035 file_magwarn(ms, "'/%c' not allowed on regex\n", 1036 CHAR_COMPACT_OPTIONAL_WHITESPACE); 1037 return -1; 1038 } 1039 break; 1040 default: 1041 file_magwarn(ms, "coding error: m->type=%d\n", 1042 m->type); 1043 return -1; 1044 } 1045 return 0; 1046} 1047 1048private int 1049get_op(char c) 1050{ 1051 switch (c) { 1052 case '&': 1053 return FILE_OPAND; 1054 case '|': 1055 return FILE_OPOR; 1056 case '^': 1057 return FILE_OPXOR; 1058 case '+': 1059 return FILE_OPADD; 1060 case '-': 1061 return FILE_OPMINUS; 1062 case '*': 1063 return FILE_OPMULTIPLY; 1064 case '/': 1065 return FILE_OPDIVIDE; 1066 case '%': 1067 return FILE_OPMODULO; 1068 default: 1069 return -1; 1070 } 1071} 1072 1073#ifdef ENABLE_CONDITIONALS 1074private int 1075get_cond(const char *l, const char **t) 1076{ 1077 static const struct cond_tbl_s { 1078 char name[8]; 1079 size_t len; 1080 int cond; 1081 } cond_tbl[] = { 1082 { "if", 2, COND_IF }, 1083 { "elif", 4, COND_ELIF }, 1084 { "else", 4, COND_ELSE }, 1085 { "", 0, COND_NONE }, 1086 }; 1087 const struct cond_tbl_s *p; 1088 1089 for (p = cond_tbl; p->len; p++) { 1090 if (strncmp(l, p->name, p->len) == 0 && 1091 isspace((unsigned char)l[p->len])) { 1092 if (t) 1093 *t = l + p->len; 1094 break; 1095 } 1096 } 1097 return p->cond; 1098} 1099 1100private int 1101check_cond(struct magic_set *ms, int cond, uint32_t cont_level) 1102{ 1103 int last_cond; 1104 last_cond = ms->c.li[cont_level].last_cond; 1105 1106 switch (cond) { 1107 case COND_IF: 1108 if (last_cond != COND_NONE && last_cond != COND_ELIF) { 1109 if (ms->flags & MAGIC_CHECK) 1110 file_magwarn(ms, "syntax error: `if'"); 1111 return -1; 1112 } 1113 last_cond = COND_IF; 1114 break; 1115 1116 case COND_ELIF: 1117 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1118 if (ms->flags & MAGIC_CHECK) 1119 file_magwarn(ms, "syntax error: `elif'"); 1120 return -1; 1121 } 1122 last_cond = COND_ELIF; 1123 break; 1124 1125 case COND_ELSE: 1126 if (last_cond != COND_IF && last_cond != COND_ELIF) { 1127 if (ms->flags & MAGIC_CHECK) 1128 file_magwarn(ms, "syntax error: `else'"); 1129 return -1; 1130 } 1131 last_cond = COND_NONE; 1132 break; 1133 1134 case COND_NONE: 1135 last_cond = COND_NONE; 1136 break; 1137 } 1138 1139 ms->c.li[cont_level].last_cond = last_cond; 1140 return 0; 1141} 1142#endif /* ENABLE_CONDITIONALS */ 1143 1144/* 1145 * parse one line from magic file, put into magic[index++] if valid 1146 */ 1147private int 1148parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, 1149 const char *line, size_t lineno, int action) 1150{ 1151#ifdef ENABLE_CONDITIONALS 1152 static uint32_t last_cont_level = 0; 1153#endif 1154 size_t i; 1155 struct magic_entry *me; 1156 struct magic *m; 1157 const char *l = line; 1158 char *t; 1159 int op; 1160 uint32_t cont_level; 1161 1162 cont_level = 0; 1163 1164 while (*l == '>') { 1165 ++l; /* step over */ 1166 cont_level++; 1167 } 1168#ifdef ENABLE_CONDITIONALS 1169 if (cont_level == 0 || cont_level > last_cont_level) 1170 if (file_check_mem(ms, cont_level) == -1) 1171 return -1; 1172 last_cont_level = cont_level; 1173#endif 1174 1175#define ALLOC_CHUNK (size_t)10 1176#define ALLOC_INCR (size_t)200 1177 1178 if (cont_level != 0) { 1179 if (*nmentryp == 0) { 1180 file_error(ms, 0, "No current entry for continuation"); 1181 return -1; 1182 } 1183 me = &(*mentryp)[*nmentryp - 1]; 1184 if (me->cont_count == me->max_count) { 1185 struct magic *nm; 1186 size_t cnt = me->max_count + ALLOC_CHUNK; 1187 if ((nm = CAST(struct magic *, realloc(me->mp, 1188 sizeof(*nm) * cnt))) == NULL) { 1189 file_oomem(ms, sizeof(*nm) * cnt); 1190 return -1; 1191 } 1192 me->mp = m = nm; 1193 me->max_count = CAST(uint32_t, cnt); 1194 } 1195 m = &me->mp[me->cont_count++]; 1196 (void)memset(m, 0, sizeof(*m)); 1197 m->cont_level = cont_level; 1198 } else { 1199 if (*nmentryp == maxmagic) { 1200 struct magic_entry *mp; 1201 1202 maxmagic += ALLOC_INCR; 1203 if ((mp = CAST(struct magic_entry *, 1204 realloc(*mentryp, sizeof(*mp) * maxmagic))) == 1205 NULL) { 1206 file_oomem(ms, sizeof(*mp) * maxmagic); 1207 return -1; 1208 } 1209 (void)memset(&mp[*nmentryp], 0, sizeof(*mp) * 1210 ALLOC_INCR); 1211 *mentryp = mp; 1212 } 1213 me = &(*mentryp)[*nmentryp]; 1214 if (me->mp == NULL) { 1215 size_t len = sizeof(*m) * ALLOC_CHUNK; 1216 if ((m = CAST(struct magic *, malloc(len))) == NULL) { 1217 file_oomem(ms, len); 1218 return -1; 1219 } 1220 me->mp = m; 1221 me->max_count = ALLOC_CHUNK; 1222 } else 1223 m = me->mp; 1224 (void)memset(m, 0, sizeof(*m)); 1225 m->factor_op = FILE_FACTOR_OP_NONE; 1226 m->cont_level = 0; 1227 me->cont_count = 1; 1228 } 1229 m->lineno = CAST(uint32_t, lineno); 1230 1231 if (*l == '&') { /* m->cont_level == 0 checked below. */ 1232 ++l; /* step over */ 1233 m->flag |= OFFADD; 1234 } 1235 if (*l == '(') { 1236 ++l; /* step over */ 1237 m->flag |= INDIR; 1238 if (m->flag & OFFADD) 1239 m->flag = (m->flag & ~OFFADD) | INDIROFFADD; 1240 1241 if (*l == '&') { /* m->cont_level == 0 checked below */ 1242 ++l; /* step over */ 1243 m->flag |= OFFADD; 1244 } 1245 } 1246 /* Indirect offsets are not valid at level 0. */ 1247 if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) 1248 if (ms->flags & MAGIC_CHECK) 1249 file_magwarn(ms, "relative offset at level 0"); 1250 1251 /* get offset, then skip over it */ 1252 m->offset = (uint32_t)strtoul(l, &t, 0); 1253 if (l == t) 1254 if (ms->flags & MAGIC_CHECK) 1255 file_magwarn(ms, "offset `%s' invalid", l); 1256 l = t; 1257 1258 if (m->flag & INDIR) { 1259 m->in_type = FILE_LONG; 1260 m->in_offset = 0; 1261 /* 1262 * read [.lbs][+-]nnnnn) 1263 */ 1264 if (*l == '.') { 1265 l++; 1266 switch (*l) { 1267 case 'l': 1268 m->in_type = FILE_LELONG; 1269 break; 1270 case 'L': 1271 m->in_type = FILE_BELONG; 1272 break; 1273 case 'm': 1274 m->in_type = FILE_MELONG; 1275 break; 1276 case 'h': 1277 case 's': 1278 m->in_type = FILE_LESHORT; 1279 break; 1280 case 'H': 1281 case 'S': 1282 m->in_type = FILE_BESHORT; 1283 break; 1284 case 'c': 1285 case 'b': 1286 case 'C': 1287 case 'B': 1288 m->in_type = FILE_BYTE; 1289 break; 1290 case 'e': 1291 case 'f': 1292 case 'g': 1293 m->in_type = FILE_LEDOUBLE; 1294 break; 1295 case 'E': 1296 case 'F': 1297 case 'G': 1298 m->in_type = FILE_BEDOUBLE; 1299 break; 1300 case 'i': 1301 m->in_type = FILE_LEID3; 1302 break; 1303 case 'I': 1304 m->in_type = FILE_BEID3; 1305 break; 1306 default: 1307 if (ms->flags & MAGIC_CHECK) 1308 file_magwarn(ms, 1309 "indirect offset type `%c' invalid", 1310 *l); 1311 break; 1312 } 1313 l++; 1314 } 1315 1316 m->in_op = 0; 1317 if (*l == '~') { 1318 m->in_op |= FILE_OPINVERSE; 1319 l++; 1320 } 1321 if ((op = get_op(*l)) != -1) { 1322 m->in_op |= op; 1323 l++; 1324 } 1325 if (*l == '(') { 1326 m->in_op |= FILE_OPINDIRECT; 1327 l++; 1328 } 1329 if (isdigit((unsigned char)*l) || *l == '-') { 1330 m->in_offset = (int32_t)strtol(l, &t, 0); 1331 if (l == t) 1332 if (ms->flags & MAGIC_CHECK) 1333 file_magwarn(ms, 1334 "in_offset `%s' invalid", l); 1335 l = t; 1336 } 1337 if (*l++ != ')' || 1338 ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) 1339 if (ms->flags & MAGIC_CHECK) 1340 file_magwarn(ms, 1341 "missing ')' in indirect offset"); 1342 } 1343 EATAB; 1344 1345#ifdef ENABLE_CONDITIONALS 1346 m->cond = get_cond(l, &l); 1347 if (check_cond(ms, m->cond, cont_level) == -1) 1348 return -1; 1349 1350 EATAB; 1351#endif 1352 1353 if (*l == 'u') { 1354 ++l; 1355 m->flag |= UNSIGNED; 1356 } 1357 1358 m->type = get_type(l, &l); 1359 if (m->type == FILE_INVALID) { 1360 if (ms->flags & MAGIC_CHECK) 1361 file_magwarn(ms, "type `%s' invalid", l); 1362 return -1; 1363 } 1364 1365 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ 1366 /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ 1367 1368 m->mask_op = 0; 1369 if (*l == '~') { 1370 if (!IS_STRING(m->type)) 1371 m->mask_op |= FILE_OPINVERSE; 1372 else if (ms->flags & MAGIC_CHECK) 1373 file_magwarn(ms, "'~' invalid for string types"); 1374 ++l; 1375 } 1376 m->str_range = 0; 1377 m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; 1378 if ((op = get_op(*l)) != -1) { 1379 if (!IS_STRING(m->type)) { 1380 uint64_t val; 1381 ++l; 1382 m->mask_op |= op; 1383 val = (uint64_t)strtoull(l, &t, 0); 1384 l = t; 1385 m->num_mask = file_signextend(ms, m, val); 1386 eatsize(&l); 1387 } 1388 else if (op == FILE_OPDIVIDE) { 1389 int have_range = 0; 1390 while (!isspace((unsigned char)*++l)) { 1391 switch (*l) { 1392 case '0': case '1': case '2': 1393 case '3': case '4': case '5': 1394 case '6': case '7': case '8': 1395 case '9': 1396 if (have_range && 1397 (ms->flags & MAGIC_CHECK)) 1398 file_magwarn(ms, 1399 "multiple ranges"); 1400 have_range = 1; 1401 m->str_range = CAST(uint32_t, 1402 strtoul(l, &t, 0)); 1403 if (m->str_range == 0) 1404 file_magwarn(ms, 1405 "zero range"); 1406 l = t - 1; 1407 break; 1408 case CHAR_COMPACT_WHITESPACE: 1409 m->str_flags |= 1410 STRING_COMPACT_WHITESPACE; 1411 break; 1412 case CHAR_COMPACT_OPTIONAL_WHITESPACE: 1413 m->str_flags |= 1414 STRING_COMPACT_OPTIONAL_WHITESPACE; 1415 break; 1416 case CHAR_IGNORE_LOWERCASE: 1417 m->str_flags |= STRING_IGNORE_LOWERCASE; 1418 break; 1419 case CHAR_IGNORE_UPPERCASE: 1420 m->str_flags |= STRING_IGNORE_UPPERCASE; 1421 break; 1422 case CHAR_REGEX_OFFSET_START: 1423 m->str_flags |= REGEX_OFFSET_START; 1424 break; 1425 case CHAR_BINTEST: 1426 m->str_flags |= STRING_BINTEST; 1427 break; 1428 case CHAR_TEXTTEST: 1429 m->str_flags |= STRING_TEXTTEST; 1430 break; 1431 case CHAR_PSTRING_1_LE: 1432 if (m->type != FILE_PSTRING) 1433 goto bad; 1434 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE; 1435 break; 1436 case CHAR_PSTRING_2_BE: 1437 if (m->type != FILE_PSTRING) 1438 goto bad; 1439 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE; 1440 break; 1441 case CHAR_PSTRING_2_LE: 1442 if (m->type != FILE_PSTRING) 1443 goto bad; 1444 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE; 1445 break; 1446 case CHAR_PSTRING_4_BE: 1447 if (m->type != FILE_PSTRING) 1448 goto bad; 1449 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; 1450 break; 1451 case CHAR_PSTRING_4_LE: 1452 if (m->type != FILE_PSTRING) 1453 goto bad; 1454 m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; 1455 break; 1456 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: 1457 if (m->type != FILE_PSTRING) 1458 goto bad; 1459 m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; 1460 break; 1461 default: 1462 bad: 1463 if (ms->flags & MAGIC_CHECK) 1464 file_magwarn(ms, 1465 "string extension `%c' " 1466 "invalid", *l); 1467 return -1; 1468 } 1469 /* allow multiple '/' for readability */ 1470 if (l[1] == '/' && 1471 !isspace((unsigned char)l[2])) 1472 l++; 1473 } 1474 if (string_modifier_check(ms, m) == -1) 1475 return -1; 1476 } 1477 else { 1478 if (ms->flags & MAGIC_CHECK) 1479 file_magwarn(ms, "invalid string op: %c", *t); 1480 return -1; 1481 } 1482 } 1483 /* 1484 * We used to set mask to all 1's here, instead let's just not do 1485 * anything if mask = 0 (unless you have a better idea) 1486 */ 1487 EATAB; 1488 1489 switch (*l) { 1490 case '>': 1491 case '<': 1492 m->reln = *l; 1493 ++l; 1494 if (*l == '=') { 1495 if (ms->flags & MAGIC_CHECK) { 1496 file_magwarn(ms, "%c= not supported", 1497 m->reln); 1498 return -1; 1499 } 1500 ++l; 1501 } 1502 break; 1503 /* Old-style anding: "0 byte &0x80 dynamically linked" */ 1504 case '&': 1505 case '^': 1506 case '=': 1507 m->reln = *l; 1508 ++l; 1509 if (*l == '=') { 1510 /* HP compat: ignore &= etc. */ 1511 ++l; 1512 } 1513 break; 1514 case '!': 1515 m->reln = *l; 1516 ++l; 1517 break; 1518 default: 1519 m->reln = '='; /* the default relation */ 1520 if (*l == 'x' && ((isascii((unsigned char)l[1]) && 1521 isspace((unsigned char)l[1])) || !l[1])) { 1522 m->reln = *l; 1523 ++l; 1524 } 1525 break; 1526 } 1527 /* 1528 * Grab the value part, except for an 'x' reln. 1529 */ 1530 if (m->reln != 'x' && getvalue(ms, m, &l, action)) 1531 return -1; 1532 1533 /* 1534 * TODO finish this macro and start using it! 1535 * #define offsetcheck {if (offset > HOWMANY-1) 1536 * magwarn("offset too big"); } 1537 */ 1538 1539 /* 1540 * Now get last part - the description 1541 */ 1542 EATAB; 1543 if (l[0] == '\b') { 1544 ++l; 1545 m->flag |= NOSPACE; 1546 } else if ((l[0] == '\\') && (l[1] == 'b')) { 1547 ++l; 1548 ++l; 1549 m->flag |= NOSPACE; 1550 } 1551 for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) 1552 continue; 1553 if (i == sizeof(m->desc)) { 1554 m->desc[sizeof(m->desc) - 1] = '\0'; 1555 if (ms->flags & MAGIC_CHECK) 1556 file_magwarn(ms, "description `%s' truncated", m->desc); 1557 } 1558 1559 /* 1560 * We only do this check while compiling, or if any of the magic 1561 * files were not compiled. 1562 */ 1563 if (ms->flags & MAGIC_CHECK) { 1564 if (check_format(ms, m) == -1) 1565 return -1; 1566 } 1567#ifndef COMPILE_ONLY 1568 if (action == FILE_CHECK) { 1569 file_mdump(m); 1570 } 1571#endif 1572 m->mimetype[0] = '\0'; /* initialise MIME type to none */ 1573 if (m->cont_level == 0) 1574 ++(*nmentryp); /* make room for next */ 1575 return 0; 1576} 1577 1578/* 1579 * parse a STRENGTH annotation line from magic file, put into magic[index - 1] 1580 * if valid 1581 */ 1582private int 1583parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line) 1584{ 1585 const char *l = line; 1586 char *el; 1587 unsigned long factor; 1588 struct magic *m = &me->mp[0]; 1589 1590 if (m->factor_op != FILE_FACTOR_OP_NONE) { 1591 file_magwarn(ms, 1592 "Current entry already has a strength type: %c %d", 1593 m->factor_op, m->factor); 1594 return -1; 1595 } 1596 EATAB; 1597 switch (*l) { 1598 case FILE_FACTOR_OP_NONE: 1599 case FILE_FACTOR_OP_PLUS: 1600 case FILE_FACTOR_OP_MINUS: 1601 case FILE_FACTOR_OP_TIMES: 1602 case FILE_FACTOR_OP_DIV: 1603 m->factor_op = *l++; 1604 break; 1605 default: 1606 file_magwarn(ms, "Unknown factor op `%c'", *l); 1607 return -1; 1608 } 1609 EATAB; 1610 factor = strtoul(l, &el, 0); 1611 if (factor > 255) { 1612 file_magwarn(ms, "Too large factor `%lu'", factor); 1613 goto out; 1614 } 1615 if (*el && !isspace((unsigned char)*el)) { 1616 file_magwarn(ms, "Bad factor `%s'", l); 1617 goto out; 1618 } 1619 m->factor = (uint8_t)factor; 1620 if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) { 1621 file_magwarn(ms, "Cannot have factor op `%c' and factor %u", 1622 m->factor_op, m->factor); 1623 goto out; 1624 } 1625 return 0; 1626out: 1627 m->factor_op = FILE_FACTOR_OP_NONE; 1628 m->factor = 0; 1629 return -1; 1630} 1631 1632/* 1633 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into 1634 * magic[index - 1] 1635 */ 1636private int 1637parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) 1638{ 1639 size_t i; 1640 const char *l = line; 1641 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 1642 1643 if (m->apple[0] != '\0') { 1644 file_magwarn(ms, "Current entry already has a APPLE type " 1645 "`%.8s', new type `%s'", m->mimetype, l); 1646 return -1; 1647 } 1648 1649 EATAB; 1650 for (i = 0; *l && ((isascii((unsigned char)*l) && 1651 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 1652 i < sizeof(m->apple); m->apple[i++] = *l++) 1653 continue; 1654 if (i == sizeof(m->apple) && *l) { 1655 /* We don't need to NUL terminate here, printing handles it */ 1656 if (ms->flags & MAGIC_CHECK) 1657 file_magwarn(ms, "APPLE type `%s' truncated %" 1658 SIZE_T_FORMAT "u", line, i); 1659 } 1660 1661 if (i > 0) 1662 return 0; 1663 else 1664 return -1; 1665} 1666 1667/* 1668 * parse a MIME annotation line from magic file, put into magic[index - 1] 1669 * if valid 1670 */ 1671private int 1672parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) 1673{ 1674 size_t i; 1675 const char *l = line; 1676 struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; 1677 1678 if (m->mimetype[0] != '\0') { 1679 file_magwarn(ms, "Current entry already has a MIME type `%s'," 1680 " new type `%s'", m->mimetype, l); 1681 return -1; 1682 } 1683 1684 EATAB; 1685 for (i = 0; *l && ((isascii((unsigned char)*l) && 1686 isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && 1687 i < sizeof(m->mimetype); m->mimetype[i++] = *l++) 1688 continue; 1689 if (i == sizeof(m->mimetype)) { 1690 m->mimetype[sizeof(m->mimetype) - 1] = '\0'; 1691 if (ms->flags & MAGIC_CHECK) 1692 file_magwarn(ms, "MIME type `%s' truncated %" 1693 SIZE_T_FORMAT "u", m->mimetype, i); 1694 } else 1695 m->mimetype[i] = '\0'; 1696 1697 if (i > 0) 1698 return 0; 1699 else 1700 return -1; 1701} 1702 1703private int 1704check_format_type(const char *ptr, int type) 1705{ 1706 int quad = 0; 1707 if (*ptr == '\0') { 1708 /* Missing format string; bad */ 1709 return -1; 1710 } 1711 1712 switch (type) { 1713 case FILE_FMT_QUAD: 1714 quad = 1; 1715 /*FALLTHROUGH*/ 1716 case FILE_FMT_NUM: 1717 if (*ptr == '-') 1718 ptr++; 1719 if (*ptr == '.') 1720 ptr++; 1721 while (isdigit((unsigned char)*ptr)) ptr++; 1722 if (*ptr == '.') 1723 ptr++; 1724 while (isdigit((unsigned char)*ptr)) ptr++; 1725 if (quad) { 1726 if (*ptr++ != 'l') 1727 return -1; 1728 if (*ptr++ != 'l') 1729 return -1; 1730 } 1731 1732 switch (*ptr++) { 1733 case 'l': 1734 switch (*ptr++) { 1735 case 'i': 1736 case 'd': 1737 case 'u': 1738 case 'x': 1739 case 'X': 1740 return 0; 1741 default: 1742 return -1; 1743 } 1744 1745 case 'h': 1746 switch (*ptr++) { 1747 case 'h': 1748 switch (*ptr++) { 1749 case 'i': 1750 case 'd': 1751 case 'u': 1752 case 'x': 1753 case 'X': 1754 return 0; 1755 default: 1756 return -1; 1757 } 1758 case 'd': 1759 return 0; 1760 default: 1761 return -1; 1762 } 1763 1764 case 'i': 1765 case 'c': 1766 case 'd': 1767 case 'u': 1768 case 'x': 1769 case 'X': 1770 return 0; 1771 1772 default: 1773 return -1; 1774 } 1775 1776 case FILE_FMT_FLOAT: 1777 case FILE_FMT_DOUBLE: 1778 if (*ptr == '-') 1779 ptr++; 1780 if (*ptr == '.') 1781 ptr++; 1782 while (isdigit((unsigned char)*ptr)) ptr++; 1783 if (*ptr == '.') 1784 ptr++; 1785 while (isdigit((unsigned char)*ptr)) ptr++; 1786 1787 switch (*ptr++) { 1788 case 'e': 1789 case 'E': 1790 case 'f': 1791 case 'F': 1792 case 'g': 1793 case 'G': 1794 return 0; 1795 1796 default: 1797 return -1; 1798 } 1799 1800 1801 case FILE_FMT_STR: 1802 if (*ptr == '-') 1803 ptr++; 1804 while (isdigit((unsigned char )*ptr)) 1805 ptr++; 1806 if (*ptr == '.') { 1807 ptr++; 1808 while (isdigit((unsigned char )*ptr)) 1809 ptr++; 1810 } 1811 1812 switch (*ptr++) { 1813 case 's': 1814 return 0; 1815 default: 1816 return -1; 1817 } 1818 1819 default: 1820 /* internal error */ 1821 abort(); 1822 } 1823 /*NOTREACHED*/ 1824 return -1; 1825} 1826 1827/* 1828 * Check that the optional printf format in description matches 1829 * the type of the magic. 1830 */ 1831private int 1832check_format(struct magic_set *ms, struct magic *m) 1833{ 1834 char *ptr; 1835 1836 for (ptr = m->desc; *ptr; ptr++) 1837 if (*ptr == '%') 1838 break; 1839 if (*ptr == '\0') { 1840 /* No format string; ok */ 1841 return 1; 1842 } 1843 1844 assert(file_nformats == file_nnames); 1845 1846 if (m->type >= file_nformats) { 1847 file_magwarn(ms, "Internal error inconsistency between " 1848 "m->type and format strings"); 1849 return -1; 1850 } 1851 if (file_formats[m->type] == FILE_FMT_NONE) { 1852 file_magwarn(ms, "No format string for `%s' with description " 1853 "`%s'", m->desc, file_names[m->type]); 1854 return -1; 1855 } 1856 1857 ptr++; 1858 if (check_format_type(ptr, file_formats[m->type]) == -1) { 1859 /* 1860 * TODO: this error message is unhelpful if the format 1861 * string is not one character long 1862 */ 1863 file_magwarn(ms, "Printf format `%c' is not valid for type " 1864 "`%s' in description `%s'", *ptr ? *ptr : '?', 1865 file_names[m->type], m->desc); 1866 return -1; 1867 } 1868 1869 for (; *ptr; ptr++) { 1870 if (*ptr == '%') { 1871 file_magwarn(ms, 1872 "Too many format strings (should have at most one) " 1873 "for `%s' with description `%s'", 1874 file_names[m->type], m->desc); 1875 return -1; 1876 } 1877 } 1878 return 0; 1879} 1880 1881/* 1882 * Read a numeric value from a pointer, into the value union of a magic 1883 * pointer, according to the magic type. Update the string pointer to point 1884 * just after the number read. Return 0 for success, non-zero for failure. 1885 */ 1886private int 1887getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) 1888{ 1889 switch (m->type) { 1890 case FILE_BESTRING16: 1891 case FILE_LESTRING16: 1892 case FILE_STRING: 1893 case FILE_PSTRING: 1894 case FILE_REGEX: 1895 case FILE_SEARCH: 1896 *p = getstr(ms, m, *p, action == FILE_COMPILE); 1897 if (*p == NULL) { 1898 if (ms->flags & MAGIC_CHECK) 1899 file_magwarn(ms, "cannot get string from `%s'", 1900 m->value.s); 1901 return -1; 1902 } 1903 return 0; 1904 case FILE_FLOAT: 1905 case FILE_BEFLOAT: 1906 case FILE_LEFLOAT: 1907 if (m->reln != 'x') { 1908 char *ep; 1909#ifdef HAVE_STRTOF 1910 m->value.f = strtof(*p, &ep); 1911#else 1912 m->value.f = (float)strtod(*p, &ep); 1913#endif 1914 *p = ep; 1915 } 1916 return 0; 1917 case FILE_DOUBLE: 1918 case FILE_BEDOUBLE: 1919 case FILE_LEDOUBLE: 1920 if (m->reln != 'x') { 1921 char *ep; 1922 m->value.d = strtod(*p, &ep); 1923 *p = ep; 1924 } 1925 return 0; 1926 default: 1927 if (m->reln != 'x') { 1928 char *ep; 1929 m->value.q = file_signextend(ms, m, 1930 (uint64_t)strtoull(*p, &ep, 0)); 1931 *p = ep; 1932 eatsize(p); 1933 } 1934 return 0; 1935 } 1936} 1937 1938/* 1939 * Convert a string containing C character escapes. Stop at an unescaped 1940 * space or tab. 1941 * Copy the converted version to "m->value.s", and the length in m->vallen. 1942 * Return updated scan pointer as function result. Warn if set. 1943 */ 1944private const char * 1945getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) 1946{ 1947 const char *origs = s; 1948 char *p = m->value.s; 1949 size_t plen = sizeof(m->value.s); 1950 char *origp = p; 1951 char *pmax = p + plen - 1; 1952 int c; 1953 int val; 1954 1955 while ((c = *s++) != '\0') { 1956 if (isspace((unsigned char) c)) 1957 break; 1958 if (p >= pmax) { 1959 file_error(ms, 0, "string too long: `%s'", origs); 1960 return NULL; 1961 } 1962 if (c == '\\') { 1963 switch(c = *s++) { 1964 1965 case '\0': 1966 if (warn) 1967 file_magwarn(ms, "incomplete escape"); 1968 goto out; 1969 1970 case '\t': 1971 if (warn) { 1972 file_magwarn(ms, 1973 "escaped tab found, use \\t instead"); 1974 warn = 0; /* already did */ 1975 } 1976 /*FALLTHROUGH*/ 1977 default: 1978 if (warn) { 1979 if (isprint((unsigned char)c)) { 1980 /* Allow escaping of 1981 * ``relations'' */ 1982 if (strchr("<>&^=!", c) == NULL 1983 && (m->type != FILE_REGEX || 1984 strchr("[]().*?^$|{}", c) 1985 == NULL)) { 1986 file_magwarn(ms, "no " 1987 "need to escape " 1988 "`%c'", c); 1989 } 1990 } else { 1991 file_magwarn(ms, 1992 "unknown escape sequence: " 1993 "\\%03o", c); 1994 } 1995 } 1996 /*FALLTHROUGH*/ 1997 /* space, perhaps force people to use \040? */ 1998 case ' ': 1999#if 0 2000 /* 2001 * Other things people escape, but shouldn't need to, 2002 * so we disallow them 2003 */ 2004 case '\'': 2005 case '"': 2006 case '?': 2007#endif 2008 /* Relations */ 2009 case '>': 2010 case '<': 2011 case '&': 2012 case '^': 2013 case '=': 2014 case '!': 2015 /* and baskslash itself */ 2016 case '\\': 2017 *p++ = (char) c; 2018 break; 2019 2020 case 'a': 2021 *p++ = '\a'; 2022 break; 2023 2024 case 'b': 2025 *p++ = '\b'; 2026 break; 2027 2028 case 'f': 2029 *p++ = '\f'; 2030 break; 2031 2032 case 'n': 2033 *p++ = '\n'; 2034 break; 2035 2036 case 'r': 2037 *p++ = '\r'; 2038 break; 2039 2040 case 't': 2041 *p++ = '\t'; 2042 break; 2043 2044 case 'v': 2045 *p++ = '\v'; 2046 break; 2047 2048 /* \ and up to 3 octal digits */ 2049 case '0': 2050 case '1': 2051 case '2': 2052 case '3': 2053 case '4': 2054 case '5': 2055 case '6': 2056 case '7': 2057 val = c - '0'; 2058 c = *s++; /* try for 2 */ 2059 if (c >= '0' && c <= '7') { 2060 val = (val << 3) | (c - '0'); 2061 c = *s++; /* try for 3 */ 2062 if (c >= '0' && c <= '7') 2063 val = (val << 3) | (c-'0'); 2064 else 2065 --s; 2066 } 2067 else 2068 --s; 2069 *p++ = (char)val; 2070 break; 2071 2072 /* \x and up to 2 hex digits */ 2073 case 'x': 2074 val = 'x'; /* Default if no digits */ 2075 c = hextoint(*s++); /* Get next char */ 2076 if (c >= 0) { 2077 val = c; 2078 c = hextoint(*s++); 2079 if (c >= 0) 2080 val = (val << 4) + c; 2081 else 2082 --s; 2083 } else 2084 --s; 2085 *p++ = (char)val; 2086 break; 2087 } 2088 } else 2089 *p++ = (char)c; 2090 } 2091out: 2092 *p = '\0'; 2093 m->vallen = CAST(unsigned char, (p - origp)); 2094 if (m->type == FILE_PSTRING) 2095 m->vallen += (unsigned char)file_pstring_length_size(m); 2096 return s; 2097} 2098 2099 2100/* Single hex char to int; -1 if not a hex char. */ 2101private int 2102hextoint(int c) 2103{ 2104 if (!isascii((unsigned char) c)) 2105 return -1; 2106 if (isdigit((unsigned char) c)) 2107 return c - '0'; 2108 if ((c >= 'a') && (c <= 'f')) 2109 return c + 10 - 'a'; 2110 if (( c>= 'A') && (c <= 'F')) 2111 return c + 10 - 'A'; 2112 return -1; 2113} 2114 2115 2116/* 2117 * Print a string containing C character escapes. 2118 */ 2119protected void 2120file_showstr(FILE *fp, const char *s, size_t len) 2121{ 2122 char c; 2123 2124 for (;;) { 2125 if (len == ~0U) { 2126 c = *s++; 2127 if (c == '\0') 2128 break; 2129 } 2130 else { 2131 if (len-- == 0) 2132 break; 2133 c = *s++; 2134 } 2135 if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ 2136 (void) fputc(c, fp); 2137 else { 2138 (void) fputc('\\', fp); 2139 switch (c) { 2140 case '\a': 2141 (void) fputc('a', fp); 2142 break; 2143 2144 case '\b': 2145 (void) fputc('b', fp); 2146 break; 2147 2148 case '\f': 2149 (void) fputc('f', fp); 2150 break; 2151 2152 case '\n': 2153 (void) fputc('n', fp); 2154 break; 2155 2156 case '\r': 2157 (void) fputc('r', fp); 2158 break; 2159 2160 case '\t': 2161 (void) fputc('t', fp); 2162 break; 2163 2164 case '\v': 2165 (void) fputc('v', fp); 2166 break; 2167 2168 default: 2169 (void) fprintf(fp, "%.3o", c & 0377); 2170 break; 2171 } 2172 } 2173 } 2174} 2175 2176/* 2177 * eatsize(): Eat the size spec from a number [eg. 10UL] 2178 */ 2179private void 2180eatsize(const char **p) 2181{ 2182 const char *l = *p; 2183 2184 if (LOWCASE(*l) == 'u') 2185 l++; 2186 2187 switch (LOWCASE(*l)) { 2188 case 'l': /* long */ 2189 case 's': /* short */ 2190 case 'h': /* short */ 2191 case 'b': /* char/byte */ 2192 case 'c': /* char/byte */ 2193 l++; 2194 /*FALLTHROUGH*/ 2195 default: 2196 break; 2197 } 2198 2199 *p = l; 2200} 2201 2202/* 2203 * handle a compiled file. 2204 */ 2205private int 2206apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, 2207 const char *fn) 2208{ 2209 int fd; 2210 struct stat st; 2211 uint32_t *ptr; 2212 uint32_t version; 2213 int needsbyteswap; 2214 char *dbname = NULL; 2215 void *mm = NULL; 2216 2217 dbname = mkdbname(ms, fn, 0); 2218 if (dbname == NULL) 2219 goto error2; 2220 2221 if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) 2222 goto error2; 2223 2224 if (fstat(fd, &st) == -1) { 2225 file_error(ms, errno, "cannot stat `%s'", dbname); 2226 goto error1; 2227 } 2228 if (st.st_size < 8) { 2229 file_error(ms, 0, "file `%s' is too small", dbname); 2230 goto error1; 2231 } 2232 2233#ifdef QUICK 2234 if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, 2235 MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { 2236 file_error(ms, errno, "cannot map `%s'", dbname); 2237 goto error1; 2238 } 2239#define RET 2 2240#else 2241 if ((mm = CAST(void *, malloc((size_t)st.st_size))) == NULL) { 2242 file_oomem(ms, (size_t)st.st_size); 2243 goto error1; 2244 } 2245 if (read(fd, mm, (size_t)st.st_size) != (ssize_t)st.st_size) { 2246 file_badread(ms); 2247 goto error1; 2248 } 2249#define RET 1 2250#endif 2251 *magicp = CAST(struct magic *, mm); 2252 (void)close(fd); 2253 fd = -1; 2254 ptr = (uint32_t *)(void *)*magicp; 2255 if (*ptr != MAGICNO) { 2256 if (swap4(*ptr) != MAGICNO) { 2257 file_error(ms, 0, "bad magic in `%s'", dbname); 2258 goto error1; 2259 } 2260 needsbyteswap = 1; 2261 } else 2262 needsbyteswap = 0; 2263 if (needsbyteswap) 2264 version = swap4(ptr[1]); 2265 else 2266 version = ptr[1]; 2267 if (version != VERSIONNO) { 2268 file_error(ms, 0, "File %s supports only version %d magic " 2269 "files. `%s' is version %d", VERSION, 2270 VERSIONNO, dbname, version); 2271 goto error1; 2272 } 2273 *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)); 2274 if (*nmagicp > 0) 2275 (*nmagicp)--; 2276 (*magicp)++; 2277 if (needsbyteswap) 2278 byteswap(*magicp, *nmagicp); 2279 free(dbname); 2280 return RET; 2281 2282error1: 2283 if (fd != -1) 2284 (void)close(fd); 2285 if (mm) { 2286#ifdef QUICK 2287 (void)munmap((void *)mm, (size_t)st.st_size); 2288#else 2289 free(mm); 2290#endif 2291 } else { 2292 *magicp = NULL; 2293 *nmagicp = 0; 2294 } 2295error2: 2296 free(dbname); 2297 return -1; 2298} 2299 2300private const uint32_t ar[] = { 2301 MAGICNO, VERSIONNO 2302}; 2303/* 2304 * handle an mmaped file. 2305 */ 2306private int 2307apprentice_compile(struct magic_set *ms, struct magic **magicp, 2308 uint32_t *nmagicp, const char *fn) 2309{ 2310 int fd = -1; 2311 char *dbname; 2312 int rv = -1; 2313 2314 dbname = mkdbname(ms, fn, 1); 2315 2316 if (dbname == NULL) 2317 goto out; 2318 2319 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) { 2320 file_error(ms, errno, "cannot open `%s'", dbname); 2321 goto out; 2322 } 2323 2324 if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) { 2325 file_error(ms, errno, "error writing `%s'", dbname); 2326 goto out; 2327 } 2328 2329 if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET) 2330 != sizeof(struct magic)) { 2331 file_error(ms, errno, "error seeking `%s'", dbname); 2332 goto out; 2333 } 2334 2335 if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp)) 2336 != (ssize_t)(sizeof(struct magic) * *nmagicp)) { 2337 file_error(ms, errno, "error writing `%s'", dbname); 2338 goto out; 2339 } 2340 2341 if (fd != -1) 2342 (void)close(fd); 2343 rv = 0; 2344out: 2345 free(dbname); 2346 return rv; 2347} 2348 2349private const char ext[] = ".mgc"; 2350/* 2351 * make a dbname 2352 */ 2353private char * 2354mkdbname(struct magic_set *ms, const char *fn, int strip) 2355{ 2356 const char *p, *q; 2357 char *buf; 2358 2359 if (strip) { 2360 if ((p = strrchr(fn, '/')) != NULL) 2361 fn = ++p; 2362 } 2363 2364 for (q = fn; *q; q++) 2365 continue; 2366 /* Look for .mgc */ 2367 for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--) 2368 if (*p != *q) 2369 break; 2370 2371 /* Did not find .mgc, restore q */ 2372 if (p >= ext) 2373 while (*q) 2374 q++; 2375 2376 q++; 2377 /* Compatibility with old code that looked in .mime */ 2378 if (ms->flags & MAGIC_MIME) { 2379 asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext); 2380 if (access(buf, R_OK) != -1) { 2381 ms->flags &= MAGIC_MIME_TYPE; 2382 return buf; 2383 } 2384 free(buf); 2385 } 2386 asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext); 2387 2388 /* Compatibility with old code that looked in .mime */ 2389 if (strstr(p, ".mime") != NULL) 2390 ms->flags &= MAGIC_MIME_TYPE; 2391 return buf; 2392} 2393 2394/* 2395 * Byteswap an mmap'ed file if needed 2396 */ 2397private void 2398byteswap(struct magic *magic, uint32_t nmagic) 2399{ 2400 uint32_t i; 2401 for (i = 0; i < nmagic; i++) 2402 bs1(&magic[i]); 2403} 2404 2405/* 2406 * swap a short 2407 */ 2408private uint16_t 2409swap2(uint16_t sv) 2410{ 2411 uint16_t rv; 2412 uint8_t *s = (uint8_t *)(void *)&sv; 2413 uint8_t *d = (uint8_t *)(void *)&rv; 2414 d[0] = s[1]; 2415 d[1] = s[0]; 2416 return rv; 2417} 2418 2419/* 2420 * swap an int 2421 */ 2422private uint32_t 2423swap4(uint32_t sv) 2424{ 2425 uint32_t rv; 2426 uint8_t *s = (uint8_t *)(void *)&sv; 2427 uint8_t *d = (uint8_t *)(void *)&rv; 2428 d[0] = s[3]; 2429 d[1] = s[2]; 2430 d[2] = s[1]; 2431 d[3] = s[0]; 2432 return rv; 2433} 2434 2435/* 2436 * swap a quad 2437 */ 2438private uint64_t 2439swap8(uint64_t sv) 2440{ 2441 uint64_t rv; 2442 uint8_t *s = (uint8_t *)(void *)&sv; 2443 uint8_t *d = (uint8_t *)(void *)&rv; 2444#if 0 2445 d[0] = s[3]; 2446 d[1] = s[2]; 2447 d[2] = s[1]; 2448 d[3] = s[0]; 2449 d[4] = s[7]; 2450 d[5] = s[6]; 2451 d[6] = s[5]; 2452 d[7] = s[4]; 2453#else 2454 d[0] = s[7]; 2455 d[1] = s[6]; 2456 d[2] = s[5]; 2457 d[3] = s[4]; 2458 d[4] = s[3]; 2459 d[5] = s[2]; 2460 d[6] = s[1]; 2461 d[7] = s[0]; 2462#endif 2463 return rv; 2464} 2465 2466/* 2467 * byteswap a single magic entry 2468 */ 2469private void 2470bs1(struct magic *m) 2471{ 2472 m->cont_level = swap2(m->cont_level); 2473 m->offset = swap4((uint32_t)m->offset); 2474 m->in_offset = swap4((uint32_t)m->in_offset); 2475 m->lineno = swap4((uint32_t)m->lineno); 2476 if (IS_STRING(m->type)) { 2477 m->str_range = swap4(m->str_range); 2478 m->str_flags = swap4(m->str_flags); 2479 } 2480 else { 2481 m->value.q = swap8(m->value.q); 2482 m->num_mask = swap8(m->num_mask); 2483 } 2484} 2485 2486protected size_t 2487file_pstring_length_size(const struct magic *m) 2488{ 2489 switch (m->str_flags & PSTRING_LEN) { 2490 case PSTRING_1_LE: 2491 return 1; 2492 case PSTRING_2_LE: 2493 case PSTRING_2_BE: 2494 return 2; 2495 case PSTRING_4_LE: 2496 case PSTRING_4_BE: 2497 return 4; 2498 default: 2499 abort(); /* Impossible */ 2500 return 1; 2501 } 2502} 2503protected size_t 2504file_pstring_get_length(const struct magic *m, const char *s) 2505{ 2506 size_t len = 0; 2507 2508 switch (m->str_flags & PSTRING_LEN) { 2509 case PSTRING_1_LE: 2510 len = *s; 2511 break; 2512 case PSTRING_2_LE: 2513 len = (s[1] << 8) | s[0]; 2514 break; 2515 case PSTRING_2_BE: 2516 len = (s[0] << 8) | s[1]; 2517 break; 2518 case PSTRING_4_LE: 2519 len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; 2520 break; 2521 case PSTRING_4_BE: 2522 len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; 2523 break; 2524 default: 2525 abort(); /* Impossible */ 2526 } 2527 2528 if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) 2529 len -= file_pstring_length_size(m); 2530 2531 return len; 2532} 2533