1/*********************************************************************** 2* * 3* This software is part of the ast package * 4* Copyright (c) 1985-2011 AT&T Intellectual Property * 5* and is licensed under the * 6* Common Public License, Version 1.0 * 7* by AT&T Intellectual Property * 8* * 9* A copy of the License is available at * 10* http://www.opensource.org/licenses/cpl1.0.txt * 11* (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12* * 13* Information and Software Systems Research * 14* AT&T Research * 15* Florham Park NJ * 16* * 17* Glenn Fowler <gsf@research.att.com> * 18* David Korn <dgk@research.att.com> * 19* Phong Vo <kpv@research.att.com> * 20* * 21***********************************************************************/ 22#pragma prototyped 23/* 24 * Glenn Fowler 25 * AT&T Research 26 * 27 * library interface to file 28 * 29 * the sum of the hacks {s5,v10,planix} is _____ than the parts 30 */ 31 32static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2011-01-28 $\0\n"; 33 34static const char lib[] = "libast:magic"; 35 36#include <ast.h> 37#include <ctype.h> 38#include <ccode.h> 39#include <dt.h> 40#include <modex.h> 41#include <error.h> 42#include <regex.h> 43#include <swap.h> 44 45#define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m) 46 47#define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE) 48 49#define MAXNEST 10 /* { ... } nesting limit */ 50#define MINITEM 4 /* magic buffer rounding */ 51 52typedef struct /* identifier dictionary entry */ 53{ 54 const char name[16]; /* identifier name */ 55 int value; /* identifier value */ 56 Dtlink_t link; /* dictionary link */ 57} Info_t; 58 59typedef struct Edit /* edit substitution */ 60{ 61 struct Edit* next; /* next in list */ 62 regex_t* from; /* from pattern */ 63} Edit_t; 64 65struct Entry; 66 67typedef struct /* loop info */ 68{ 69 struct Entry* lab; /* call this function */ 70 int start; /* start here */ 71 int size; /* increment by this amount */ 72 int count; /* dynamic loop count */ 73 int offset; /* dynamic offset */ 74} Loop_t; 75 76typedef struct Entry /* magic file entry */ 77{ 78 struct Entry* next; /* next in list */ 79 char* expr; /* offset expression */ 80 union 81 { 82 unsigned long num; 83 char* str; 84 struct Entry* lab; 85 regex_t* sub; 86 Loop_t* loop; 87 } value; /* comparison value */ 88 char* desc; /* file description */ 89 char* mime; /* file mime type */ 90 unsigned long offset; /* offset in bytes */ 91 unsigned long mask; /* mask before compare */ 92 char cont; /* continuation operation */ 93 char type; /* datum type */ 94 char op; /* comparison operation */ 95 char nest; /* { or } nesting operation */ 96 char swap; /* forced swap order */ 97} Entry_t; 98 99#define CC_BIT 5 100 101#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2) 102typedef unsigned short Cctype_t; 103#else 104typedef unsigned long Cctype_t; 105#endif 106 107#define CC_text 0x01 108#define CC_control 0x02 109#define CC_latin 0x04 110#define CC_binary 0x08 111#define CC_utf_8 0x10 112 113#define CC_notext CC_text /* CC_text is flipped before checking */ 114 115#define CC_MASK (CC_binary|CC_latin|CC_control|CC_text) 116 117#define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text) 118 119#define ID_NONE 0 120#define ID_ASM 1 121#define ID_C 2 122#define ID_COBOL 3 123#define ID_COPYBOOK 4 124#define ID_CPLUSPLUS 5 125#define ID_FORTRAN 6 126#define ID_HTML 7 127#define ID_INCL1 8 128#define ID_INCL2 9 129#define ID_INCL3 10 130#define ID_MAM1 11 131#define ID_MAM2 12 132#define ID_MAM3 13 133#define ID_NOTEXT 14 134#define ID_PL1 15 135#define ID_YACC 16 136 137#define ID_MAX ID_YACC 138 139#define INFO_atime 1 140#define INFO_blocks 2 141#define INFO_ctime 3 142#define INFO_fstype 4 143#define INFO_gid 5 144#define INFO_mode 6 145#define INFO_mtime 7 146#define INFO_name 8 147#define INFO_nlink 9 148#define INFO_size 10 149#define INFO_uid 11 150 151#define _MAGIC_PRIVATE_ \ 152 Magicdisc_t* disc; /* discipline */ \ 153 Vmalloc_t* vm; /* vmalloc region */ \ 154 Entry_t* magic; /* parsed magic table */ \ 155 Entry_t* magiclast; /* last entry in magic */ \ 156 char* mime; /* MIME type */ \ 157 unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \ 158 char fbuf[SF_BUFSIZE + 1]; /* file data */ \ 159 char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \ 160 char nbuf[256]; /* !CC_NATIVE data */ \ 161 char mbuf[64]; /* mime string */ \ 162 char sbuf[64]; /* type suffix string */ \ 163 char tbuf[2 * PATH_MAX]; /* type string */ \ 164 Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \ 165 unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \ 166 unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \ 167 int keep[MAXNEST]; /* ckmagic nest stack */ \ 168 char* cap[MAXNEST]; /* ckmagic mime stack */ \ 169 char* msg[MAXNEST]; /* ckmagic text stack */ \ 170 Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \ 171 int fbsz; /* fbuf size */ \ 172 int fbmx; /* fbuf max size */ \ 173 int xbsz; /* xbuf size */ \ 174 int swap; /* swap() operation */ \ 175 unsigned long flags; /* disc+open flags */ \ 176 long xoff; /* xbuf offset */ \ 177 int identifier[ID_MAX + 1]; /* Info_t identifier */ \ 178 Sfio_t* fp; /* fbuf fp */ \ 179 Sfio_t* tmp; /* tmp string */ \ 180 regdisc_t redisc; /* regex discipline */ \ 181 Dtdisc_t dtdisc; /* dict discipline */ \ 182 Dt_t* idtab; /* identifier dict */ \ 183 Dt_t* infotab; /* info keyword dict */ 184 185#include <magic.h> 186 187static Info_t dict[] = /* keyword dictionary */ 188{ 189 { "COMMON", ID_FORTRAN }, 190 { "COMPUTE", ID_COBOL }, 191 { "COMP", ID_COPYBOOK }, 192 { "COMPUTATIONAL",ID_COPYBOOK }, 193 { "DCL", ID_PL1 }, 194 { "DEFINED", ID_PL1 }, 195 { "DIMENSION", ID_FORTRAN }, 196 { "DIVISION", ID_COBOL }, 197 { "FILLER", ID_COPYBOOK }, 198 { "FIXED", ID_PL1 }, 199 { "FUNCTION", ID_FORTRAN }, 200 { "HTML", ID_HTML }, 201 { "INTEGER", ID_FORTRAN }, 202 { "MAIN", ID_PL1 }, 203 { "OPTIONS", ID_PL1 }, 204 { "PERFORM", ID_COBOL }, 205 { "PIC", ID_COPYBOOK }, 206 { "REAL", ID_FORTRAN }, 207 { "REDEFINES", ID_COPYBOOK }, 208 { "S9", ID_COPYBOOK }, 209 { "SECTION", ID_COBOL }, 210 { "SELECT", ID_COBOL }, 211 { "SUBROUTINE", ID_FORTRAN }, 212 { "TEXT", ID_ASM }, 213 { "VALUE", ID_COPYBOOK }, 214 { "attr", ID_MAM3 }, 215 { "binary", ID_YACC }, 216 { "block", ID_FORTRAN }, 217 { "bss", ID_ASM }, 218 { "byte", ID_ASM }, 219 { "char", ID_C }, 220 { "class", ID_CPLUSPLUS }, 221 { "clr", ID_NOTEXT }, 222 { "comm", ID_ASM }, 223 { "common", ID_FORTRAN }, 224 { "data", ID_ASM }, 225 { "dimension", ID_FORTRAN }, 226 { "done", ID_MAM2 }, 227 { "double", ID_C }, 228 { "even", ID_ASM }, 229 { "exec", ID_MAM3 }, 230 { "extern", ID_C }, 231 { "float", ID_C }, 232 { "function", ID_FORTRAN }, 233 { "globl", ID_ASM }, 234 { "h", ID_INCL3 }, 235 { "html", ID_HTML }, 236 { "include", ID_INCL1 }, 237 { "int", ID_C }, 238 { "integer", ID_FORTRAN }, 239 { "jmp", ID_NOTEXT }, 240 { "left", ID_YACC }, 241 { "libc", ID_INCL2 }, 242 { "long", ID_C }, 243 { "make", ID_MAM1 }, 244 { "mov", ID_NOTEXT }, 245 { "private", ID_CPLUSPLUS }, 246 { "public", ID_CPLUSPLUS }, 247 { "real", ID_FORTRAN }, 248 { "register", ID_C }, 249 { "right", ID_YACC }, 250 { "sfio", ID_INCL2 }, 251 { "static", ID_C }, 252 { "stdio", ID_INCL2 }, 253 { "struct", ID_C }, 254 { "subroutine", ID_FORTRAN }, 255 { "sys", ID_NOTEXT }, 256 { "term", ID_YACC }, 257 { "text", ID_ASM }, 258 { "tst", ID_NOTEXT }, 259 { "type", ID_YACC }, 260 { "typedef", ID_C }, 261 { "u", ID_INCL2 }, 262 { "union", ID_YACC }, 263 { "void", ID_C }, 264}; 265 266static Info_t info[] = 267{ 268 { "atime", INFO_atime }, 269 { "blocks", INFO_blocks }, 270 { "ctime", INFO_ctime }, 271 { "fstype", INFO_fstype }, 272 { "gid", INFO_gid }, 273 { "mode", INFO_mode }, 274 { "mtime", INFO_mtime }, 275 { "name", INFO_name }, 276 { "nlink", INFO_nlink }, 277 { "size", INFO_size }, 278 { "uid", INFO_uid }, 279}; 280 281/* 282 * return pointer to data at offset off and size siz 283 */ 284 285static char* 286getdata(register Magic_t* mp, register long off, register int siz) 287{ 288 register long n; 289 290 if (off < 0) 291 return 0; 292 if (off + siz <= mp->fbsz) 293 return mp->fbuf + off; 294 if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz) 295 { 296 if (off + siz > mp->fbmx) 297 return 0; 298 n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2); 299 if (sfseek(mp->fp, n, SEEK_SET) != n) 300 return 0; 301 if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0) 302 { 303 mp->xoff = 0; 304 mp->xbsz = 0; 305 return 0; 306 } 307 mp->xbuf[mp->xbsz] = 0; 308 mp->xoff = n; 309 if (off + siz > mp->xoff + mp->xbsz) 310 return 0; 311 } 312 return mp->xbuf + off - mp->xoff; 313} 314 315/* 316 * @... evaluator for strexpr() 317 */ 318 319static long 320indirect(const char* cs, char** e, void* handle) 321{ 322 register char* s = (char*)cs; 323 register Magic_t* mp = (Magic_t*)handle; 324 register long n = 0; 325 register char* p; 326 327 if (s) 328 { 329 if (*s == '@') 330 { 331 n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0); 332 switch (*(s = *e)) 333 { 334 case 'b': 335 case 'B': 336 s++; 337 if (p = getdata(mp, n, 1)) 338 n = *(unsigned char*)p; 339 else 340 s = (char*)cs; 341 break; 342 case 'h': 343 case 'H': 344 s++; 345 if (p = getdata(mp, n, 2)) 346 n = swapget(mp->swap, p, 2); 347 else 348 s = (char*)cs; 349 break; 350 case 'q': 351 case 'Q': 352 s++; 353 if (p = getdata(mp, n, 8)) 354 n = swapget(mp->swap, p, 8); 355 else 356 s = (char*)cs; 357 break; 358 default: 359 if (isalnum(*s)) 360 s++; 361 if (p = getdata(mp, n, 4)) 362 n = swapget(mp->swap, p, 4); 363 else 364 s = (char*)cs; 365 break; 366 } 367 } 368 *e = s; 369 } 370 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 371 (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e); 372 return n; 373} 374 375/* 376 * emit regex error message 377 */ 378 379static void 380regmessage(Magic_t* mp, regex_t* re, int code) 381{ 382 char buf[128]; 383 384 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 385 { 386 regerror(code, re, buf, sizeof(buf)); 387 (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf); 388 } 389} 390 391/* 392 * decompose vcodex(3) method composition 393 */ 394 395static char* 396vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x) 397{ 398 unsigned char* map; 399 const char* o; 400 int c; 401 int n; 402 int i; 403 int a; 404 405 map = CCMAP(CC_ASCII, CC_NATIVE); 406 a = 0; 407 i = 1; 408 for (;;) 409 { 410 if (i) 411 i = 0; 412 else 413 *b++ = '^'; 414 if (m < (x - 1) && !*(m + 1)) 415 { 416 /* 417 * obsolete indices 418 */ 419 420 if (!a) 421 { 422 a = 1; 423 o = "old, "; 424 while (b < e && (c = *o++)) 425 *b++ = c; 426 } 427 switch (*m) 428 { 429 case 0: o = "delta"; break; 430 case 1: o = "huffman"; break; 431 case 2: o = "huffgroup"; break; 432 case 3: o = "arith"; break; 433 case 4: o = "bwt"; break; 434 case 5: o = "rle"; break; 435 case 6: o = "mtf"; break; 436 case 7: o = "transpose"; break; 437 case 8: o = "table"; break; 438 case 9: o = "huffpart"; break; 439 case 50: o = "map"; break; 440 case 100: o = "recfm"; break; 441 case 101: o = "ss7"; break; 442 default: o = "UNKNOWN"; break; 443 } 444 m += 2; 445 while (b < e && (c = *o++)) 446 *b++ = c; 447 } 448 else 449 while (b < e && m < x && (c = *m++)) 450 { 451 if (map) 452 c = map[c]; 453 *b++ = c; 454 } 455 if (b >= e) 456 break; 457 n = 0; 458 while (m < x) 459 { 460 n = (n<<7) | (*m & 0x7f); 461 if (!(*m++ & 0x80)) 462 break; 463 } 464 if (n >= (x - m)) 465 break; 466 m += n; 467 } 468 return b; 469} 470 471/* 472 * check for magic table match in buf 473 */ 474 475static char* 476ckmagic(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st, unsigned long off) 477{ 478 register Entry_t* ep; 479 register char* p; 480 register char* b; 481 register int level = 0; 482 int call = -1; 483 int all = 0; 484 int c; 485 int str; 486 char* q; 487 char* t; 488 char* cur; 489 char* base = 0; 490 unsigned long num; 491 unsigned long mask; 492 regmatch_t matches[10]; 493 494 mp->swap = 0; 495 b = mp->msg[0] = cur = buf; 496 mp->mime = mp->cap[0] = 0; 497 mp->keep[0] = 0; 498 for (ep = mp->magic; ep; ep = ep->next) 499 { 500 fun: 501 if (ep->nest == '{') 502 { 503 if (++level >= MAXNEST) 504 { 505 call = -1; 506 level = 0; 507 mp->keep[0] = 0; 508 b = mp->msg[0]; 509 mp->mime = mp->cap[0]; 510 continue; 511 } 512 mp->keep[level] = mp->keep[level - 1] != 0; 513 mp->msg[level] = b; 514 mp->cap[level] = mp->mime; 515 } 516 switch (ep->cont) 517 { 518 case '#': 519 if (mp->keep[level] && b > cur) 520 { 521 if ((mp->flags & MAGIC_ALL) && b < (end - 3)) 522 { 523 all = 1; 524 *b++ = '\n'; 525 cur = b; 526 continue; 527 } 528 *b = 0; 529 return buf; 530 } 531 mp->swap = 0; 532 b = mp->msg[0] = cur; 533 mp->mime = mp->cap[0] = 0; 534 if (ep->type == ' ') 535 continue; 536 break; 537 case '$': 538 if (mp->keep[level] && call < (MAXNEST - 1)) 539 { 540 mp->ret[++call] = ep; 541 ep = ep->value.lab; 542 goto fun; 543 } 544 continue; 545 case ':': 546 ep = mp->ret[call--]; 547 if (ep->op == 'l') 548 goto fun; 549 continue; 550 case '|': 551 if (mp->keep[level] > 1) 552 goto checknest; 553 /*FALLTHROUGH*/ 554 default: 555 if (!mp->keep[level]) 556 { 557 b = mp->msg[level]; 558 mp->mime = mp->cap[level]; 559 goto checknest; 560 } 561 break; 562 } 563 p = ""; 564 num = 0; 565 if (!ep->expr) 566 num = ep->offset + off; 567 else 568 switch (ep->offset) 569 { 570 case 0: 571 num = strexpr(ep->expr, NiL, indirect, mp) + off; 572 break; 573 case INFO_atime: 574 num = st->st_atime; 575 ep->type = 'D'; 576 break; 577 case INFO_blocks: 578 num = iblocks(st); 579 ep->type = 'N'; 580 break; 581 case INFO_ctime: 582 num = st->st_ctime; 583 ep->type = 'D'; 584 break; 585 case INFO_fstype: 586 p = fmtfs(st); 587 ep->type = toupper(ep->type); 588 break; 589 case INFO_gid: 590 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 591 { 592 p = fmtgid(st->st_gid); 593 ep->type = toupper(ep->type); 594 } 595 else 596 { 597 num = st->st_gid; 598 ep->type = 'N'; 599 } 600 break; 601 case INFO_mode: 602 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 603 { 604 p = fmtmode(st->st_mode, 0); 605 ep->type = toupper(ep->type); 606 } 607 else 608 { 609 num = modex(st->st_mode); 610 ep->type = 'N'; 611 } 612 break; 613 case INFO_mtime: 614 num = st->st_ctime; 615 ep->type = 'D'; 616 break; 617 case INFO_name: 618 if (!base) 619 { 620 if (base = strrchr(file, '/')) 621 base++; 622 else 623 base = (char*)file; 624 } 625 p = base; 626 ep->type = toupper(ep->type); 627 break; 628 case INFO_nlink: 629 num = st->st_nlink; 630 ep->type = 'N'; 631 break; 632 case INFO_size: 633 num = st->st_size; 634 ep->type = 'N'; 635 break; 636 case INFO_uid: 637 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 638 { 639 p = fmtuid(st->st_uid); 640 ep->type = toupper(ep->type); 641 } 642 else 643 { 644 num = st->st_uid; 645 ep->type = 'N'; 646 } 647 break; 648 } 649 switch (ep->type) 650 { 651 652 case 'b': 653 if (!(p = getdata(mp, num, 1))) 654 goto next; 655 num = *(unsigned char*)p; 656 break; 657 658 case 'h': 659 if (!(p = getdata(mp, num, 2))) 660 goto next; 661 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2); 662 break; 663 664 case 'd': 665 case 'l': 666 case 'v': 667 if (!(p = getdata(mp, num, 4))) 668 goto next; 669 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4); 670 break; 671 672 case 'q': 673 if (!(p = getdata(mp, num, 8))) 674 goto next; 675 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8); 676 break; 677 678 case 'e': 679 if (!(p = getdata(mp, num, 0))) 680 goto next; 681 /*FALLTHROUGH*/ 682 case 'E': 683 if (!ep->value.sub) 684 goto next; 685 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) 686 { 687 c = mp->fbsz; 688 if (c >= sizeof(mp->nbuf)) 689 c = sizeof(mp->nbuf) - 1; 690 p = (char*)memcpy(mp->nbuf, p, c); 691 p[c] = 0; 692 ccmapstr(mp->x2n, p, c); 693 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) 694 { 695 if (c != REG_NOMATCH) 696 regmessage(mp, ep->value.sub, c); 697 goto next; 698 } 699 } 700 p = ep->value.sub->re_sub->re_buf; 701 q = T(ep->desc); 702 t = *q ? q : p; 703 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b') 704 *b++ = ' '; 705 b += sfsprintf(b, end - b, *q ? q : "%s", p + (*p == '\b')); 706 if (ep->mime) 707 mp->mime = ep->mime; 708 goto checknest; 709 710 case 's': 711 if (!(p = getdata(mp, num, ep->mask))) 712 goto next; 713 goto checkstr; 714 case 'm': 715 if (!(p = getdata(mp, num, 0))) 716 goto next; 717 /*FALLTHROUGH*/ 718 case 'M': 719 case 'S': 720 checkstr: 721 for (;;) 722 { 723 if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p)) 724 break; 725 if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask)) 726 break; 727 if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf)) 728 goto next; 729 p = (char*)memcpy(mp->nbuf, p, ep->mask); 730 p[ep->mask] = 0; 731 ccmapstr(mp->x2n, p, ep->mask); 732 } 733 q = T(ep->desc); 734 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') 735 *b++ = ' '; 736 for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++); 737 *t = 0; 738 b += sfsprintf(b, end - b, q + (*q == '\b'), p); 739 *t = c; 740 if (ep->mime) 741 mp->mime = ep->mime; 742 goto checknest; 743 744 } 745 if (mask = ep->mask) 746 num &= mask; 747 switch (ep->op) 748 { 749 750 case '=': 751 case '@': 752 if (num == ep->value.num) 753 break; 754 if (ep->cont != '#') 755 goto next; 756 if (!mask) 757 mask = ~mask; 758 if (ep->type == 'h') 759 { 760 if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num) 761 { 762 if (!(mp->swap & (mp->swap + 1))) 763 mp->swap = 7; 764 goto swapped; 765 } 766 } 767 else if (ep->type == 'l') 768 { 769 for (c = 1; c < 4; c++) 770 if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num) 771 { 772 if (!(mp->swap & (mp->swap + 1))) 773 mp->swap = 7; 774 goto swapped; 775 } 776 } 777 else if (ep->type == 'q') 778 { 779 for (c = 1; c < 8; c++) 780 if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num) 781 goto swapped; 782 } 783 goto next; 784 785 case '!': 786 if (num != ep->value.num) 787 break; 788 goto next; 789 790 case '^': 791 if (num ^ ep->value.num) 792 break; 793 goto next; 794 795 case '>': 796 if (num > ep->value.num) 797 break; 798 goto next; 799 800 case '<': 801 if (num < ep->value.num) 802 break; 803 goto next; 804 805 case 'l': 806 if (num > 0 && mp->keep[level] && call < (MAXNEST - 1)) 807 { 808 if (!ep->value.loop->count) 809 { 810 ep->value.loop->count = num; 811 ep->value.loop->offset = off; 812 off = ep->value.loop->start; 813 } 814 else if (!--ep->value.loop->count) 815 { 816 off = ep->value.loop->offset; 817 goto next; 818 } 819 else 820 off += ep->value.loop->size; 821 mp->ret[++call] = ep; 822 ep = ep->value.loop->lab; 823 goto fun; 824 } 825 goto next; 826 827 case 'm': 828 c = mp->swap; 829 t = ckmagic(mp, file, b + (b > cur), end, st, num); 830 mp->swap = c; 831 if (t) 832 { 833 if (b > cur && b < end) 834 *b = ' '; 835 b += strlen(b); 836 } 837 else if (ep->cont == '&') 838 goto next; 839 break; 840 841 case 'r': 842#if _UWIN 843 { 844 char* e; 845 Sfio_t* rp; 846 Sfio_t* gp; 847 848 if (!(t = strrchr(file, '.'))) 849 goto next; 850 sfprintf(mp->tmp, "/reg/classes_root/%s", t); 851 if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r"))) 852 goto next; 853 *ep->desc = 0; 854 *ep->mime = 0; 855 gp = 0; 856 while (t = sfgetr(rp, '\n', 1)) 857 { 858 if (strneq(t, "Content Type=", 13)) 859 { 860 ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0); 861 strcpy(ep->mime, t + 13); 862 if (gp) 863 break; 864 } 865 else 866 { 867 sfprintf(mp->tmp, "/reg/classes_root/%s", t); 868 if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r"))) 869 { 870 ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1); 871 strcpy(ep->desc, t); 872 if (*ep->mime) 873 break; 874 } 875 } 876 } 877 sfclose(rp); 878 if (!gp) 879 goto next; 880 if (!*ep->mime) 881 { 882 t = T(ep->desc); 883 if (!strncasecmp(t, "microsoft", 9)) 884 t += 9; 885 while (isspace(*t)) 886 t++; 887 e = "application/x-ms-"; 888 ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e)); 889 e = strcopy(ep->mime, e); 890 while ((c = *t++) && c != '.' && c != ' ') 891 *e++ = isupper(c) ? tolower(c) : c; 892 *e = 0; 893 } 894 while (t = sfgetr(gp, '\n', 1)) 895 if (*t && !streq(t, "\"\"")) 896 { 897 ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0); 898 strcpy(ep->desc, t); 899 break; 900 } 901 sfclose(gp); 902 if (!*ep->desc) 903 goto next; 904 if (!t) 905 for (t = T(ep->desc); *t; t++) 906 if (*t == '.') 907 *t = ' '; 908 if (!mp->keep[level]) 909 mp->keep[level] = 2; 910 mp->mime = ep->mime; 911 break; 912 } 913#else 914 if (ep->cont == '#' && !mp->keep[level]) 915 mp->keep[level] = 1; 916 goto next; 917#endif 918 919 case 'v': 920 if (!(p = getdata(mp, num, 4))) 921 goto next; 922 c = 0; 923 do 924 { 925 num++; 926 c = (c<<7) | (*p & 0x7f); 927 } while (*p++ & 0x80); 928 if (!(p = getdata(mp, num, c))) 929 goto next; 930 if (mp->keep[level]++ && b > cur && b < (end - 1) && *(b - 1) != ' ') 931 { 932 *b++ = ','; 933 *b++ = ' '; 934 } 935 b = vcdecomp(b, cur + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c); 936 goto checknest; 937 938 } 939 swapped: 940 q = T(ep->desc); 941 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') 942 *b++ = ' '; 943 if (*q == '\b') 944 q++; 945 str = 0; 946 for (t = q; *t; t++) 947 if (*t == '%' && (c = *(t + 1))) 948 { 949 if (c == '%') 950 t++; 951 else 952 while (c && c != '%') 953 { 954 if (c == 's') 955 { 956 str = 1; 957 break; 958 } 959 t++; 960 c = *(t + 1); 961 } 962 } 963 if (!str) 964 b += sfsprintf(b, end - b, q, num, 0, 0, 0, 0, 0, 0, 0); 965 else if (ep->type == 'd' || ep->type == 'D') 966 b += sfsprintf(b, end - b, q, fmttime("%?%QL", (time_t)num), 0, 0, 0, 0, 0, 0, 0); 967 else if (ep->type == 'v') 968 b += sfsprintf(b, end - b, q, fmtversion(num), 0, 0, 0, 0, 0, 0, 0); 969 else 970 b += sfsprintf(b, end - b, q, fmtnum(num, 0), 0, 0, 0, 0, 0, 0, 0); 971 if (ep->mime && *ep->mime) 972 mp->mime = ep->mime; 973 checknest: 974 if (ep->nest == '}') 975 { 976 if (!mp->keep[level]) 977 { 978 b = mp->msg[level]; 979 mp->mime = mp->cap[level]; 980 } 981 else if (level > 0) 982 mp->keep[level - 1] = mp->keep[level]; 983 if (--level < 0) 984 { 985 level = 0; 986 mp->keep[0] = 0; 987 } 988 } 989 continue; 990 next: 991 if (ep->cont == '&') 992 mp->keep[level] = 0; 993 goto checknest; 994 } 995 if (all && b-- || mp->keep[level] && b > cur) 996 { 997 *b = 0; 998 return buf; 999 } 1000 return 0; 1001} 1002 1003/* 1004 * check english language stats 1005 */ 1006 1007static int 1008ckenglish(register Magic_t* mp, int pun, int badpun) 1009{ 1010 register char* s; 1011 register int vowl = 0; 1012 register int freq = 0; 1013 register int rare = 0; 1014 1015 if (5 * badpun > pun) 1016 return 0; 1017 if (2 * mp->count[';'] > mp->count['E'] + mp->count['e']) 1018 return 0; 1019 if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e']) 1020 return 0; 1021 for (s = "aeiou"; *s; s++) 1022 vowl += mp->count[toupper(*s)] + mp->count[*s]; 1023 for (s = "etaion"; *s; s++) 1024 freq += mp->count[toupper(*s)] + mp->count[*s]; 1025 for (s = "vjkqxz"; *s; s++) 1026 rare += mp->count[toupper(*s)] + mp->count[*s]; 1027 return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare; 1028} 1029 1030/* 1031 * check programming language stats 1032 */ 1033 1034static char* 1035cklang(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st) 1036{ 1037 register int c; 1038 register unsigned char* b; 1039 register unsigned char* e; 1040 register int q; 1041 register char* s; 1042 char* t; 1043 char* base; 1044 char* suff; 1045 char* t1; 1046 char* t2; 1047 char* t3; 1048 int n; 1049 int badpun; 1050 int code; 1051 int pun; 1052 Cctype_t flags; 1053 Info_t* ip; 1054 1055 b = (unsigned char*)mp->fbuf; 1056 e = b + mp->fbsz; 1057 memzero(mp->count, sizeof(mp->count)); 1058 memzero(mp->multi, sizeof(mp->multi)); 1059 memzero(mp->identifier, sizeof(mp->identifier)); 1060 1061 /* 1062 * check character coding 1063 */ 1064 1065 flags = 0; 1066 while (b < e) 1067 flags |= mp->cctype[*b++]; 1068 b = (unsigned char*)mp->fbuf; 1069 code = 0; 1070 q = CC_ASCII; 1071 n = CC_MASK; 1072 for (c = 0; c < CC_MAPS; c++) 1073 { 1074 flags ^= CC_text; 1075 if ((flags & CC_MASK) < n) 1076 { 1077 n = flags & CC_MASK; 1078 q = c; 1079 } 1080 flags >>= CC_BIT; 1081 } 1082 flags = n; 1083 if (!(flags & (CC_binary|CC_notext))) 1084 { 1085 if (q != CC_NATIVE) 1086 { 1087 code = q; 1088 ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE); 1089 } 1090 if (b[0] == '#' && b[1] == '!') 1091 { 1092 for (b += 2; b < e && isspace(*b); b++); 1093 for (s = (char*)b; b < e && isprint(*b); b++); 1094 c = *b; 1095 *b = 0; 1096 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK)) 1097 { 1098 if (t = strrchr(s, '/')) 1099 s = t + 1; 1100 for (t = s; *t; t++) 1101 if (isspace(*t)) 1102 { 1103 *t = 0; 1104 break; 1105 } 1106 sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh"); 1107 mp->mime = mp->mbuf; 1108 if (match(s, "*sh")) 1109 { 1110 t1 = T("command"); 1111 if (streq(s, "sh")) 1112 *s = 0; 1113 else 1114 { 1115 *b++ = ' '; 1116 *b = 0; 1117 } 1118 } 1119 else 1120 { 1121 t1 = T("interpreter"); 1122 *b++ = ' '; 1123 *b = 0; 1124 } 1125 sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1); 1126 s = mp->sbuf; 1127 goto qualify; 1128 } 1129 *b = c; 1130 b = (unsigned char*)mp->fbuf; 1131 } 1132 badpun = 0; 1133 pun = 0; 1134 q = 0; 1135 s = 0; 1136 t = 0; 1137 while (b < e) 1138 { 1139 c = *b++; 1140 mp->count[c]++; 1141 if (c == q && (q != '*' || *b == '/' && b++)) 1142 { 1143 mp->multi[q]++; 1144 q = 0; 1145 } 1146 else if (c == '\\') 1147 { 1148 s = 0; 1149 b++; 1150 } 1151 else if (!q) 1152 { 1153 if (isalpha(c) || c == '_') 1154 { 1155 if (!s) 1156 s = (char*)b - 1; 1157 } 1158 else if (!isdigit(c)) 1159 { 1160 if (s) 1161 { 1162 if (s > mp->fbuf) 1163 switch (*(s - 1)) 1164 { 1165 case ':': 1166 if (*b == ':') 1167 mp->multi[':']++; 1168 break; 1169 case '.': 1170 if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n')) 1171 mp->multi['.']++; 1172 break; 1173 case '\n': 1174 case '\\': 1175 if (*b == '{') 1176 t = (char*)b + 1; 1177 break; 1178 case '{': 1179 if (s == t && *b == '}') 1180 mp->multi['X']++; 1181 break; 1182 } 1183 if (!mp->idtab) 1184 { 1185 if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash)) 1186 for (q = 0; q < elementsof(dict); q++) 1187 dtinsert(mp->idtab, &dict[q]); 1188 else if (mp->disc->errorf) 1189 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); 1190 q = 0; 1191 } 1192 if (mp->idtab) 1193 { 1194 *(b - 1) = 0; 1195 if (ip = (Info_t*)dtmatch(mp->idtab, s)) 1196 mp->identifier[ip->value]++; 1197 *(b - 1) = c; 1198 } 1199 s = 0; 1200 } 1201 switch (c) 1202 { 1203 case '\t': 1204 if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n') 1205 mp->multi['\t']++; 1206 break; 1207 case '"': 1208 case '\'': 1209 q = c; 1210 break; 1211 case '/': 1212 if (*b == '*') 1213 q = *b++; 1214 else if (*b == '/') 1215 q = '\n'; 1216 break; 1217 case '$': 1218 if (*b == '(' && *(b + 1) != ' ') 1219 mp->multi['$']++; 1220 break; 1221 case '{': 1222 case '}': 1223 case '[': 1224 case ']': 1225 case '(': 1226 mp->multi[c]++; 1227 break; 1228 case ')': 1229 mp->multi[c]++; 1230 goto punctuation; 1231 case ':': 1232 if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2))) 1233 mp->multi[':']++; 1234 goto punctuation; 1235 case '.': 1236 case ',': 1237 case '%': 1238 case ';': 1239 case '?': 1240 punctuation: 1241 pun++; 1242 if (*b != ' ' && *b != '\n') 1243 badpun++; 1244 break; 1245 } 1246 } 1247 } 1248 } 1249 } 1250 else 1251 while (b < e) 1252 mp->count[*b++]++; 1253 base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file; 1254 suff = (t1 = strrchr(base, '.')) ? t1 + 1 : ""; 1255 if (!flags) 1256 { 1257 if (match(suff, "*sh|bat|cmd")) 1258 goto id_sh; 1259 if (match(base, "*@(mkfile)")) 1260 goto id_mk; 1261 if (match(base, "*@(makefile|.mk)")) 1262 goto id_make; 1263 if (match(base, "*@(mamfile|.mam)")) 1264 goto id_mam; 1265 if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy")) 1266 goto id_c; 1267 if (match(suff, "f")) 1268 goto id_fortran; 1269 if (match(suff, "htm+(l)")) 1270 goto id_html; 1271 if (match(suff, "cpy")) 1272 goto id_copybook; 1273 if (match(suff, "cob|cbl|cb2")) 1274 goto id_cobol; 1275 if (match(suff, "pl[1i]")) 1276 goto id_pl1; 1277 if (match(suff, "tex")) 1278 goto id_tex; 1279 if (match(suff, "asm|s")) 1280 goto id_asm; 1281 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.'))) 1282 { 1283 id_sh: 1284 s = T("command script"); 1285 mp->mime = "application/sh"; 1286 goto qualify; 1287 } 1288 if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *")) 1289 { 1290 s = T("mail message"); 1291 mp->mime = "message/rfc822"; 1292 goto qualify; 1293 } 1294 if (match(base, "*@(mkfile)")) 1295 { 1296 id_mk: 1297 s = "mkfile"; 1298 mp->mime = "application/mk"; 1299 goto qualify; 1300 } 1301 if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0)) 1302 { 1303 id_make: 1304 s = "makefile"; 1305 mp->mime = "application/make"; 1306 goto qualify; 1307 } 1308 if (mp->multi['.'] >= 3) 1309 { 1310 s = T("nroff input"); 1311 mp->mime = "application/x-troff"; 1312 goto qualify; 1313 } 1314 if (mp->multi['X'] >= 3) 1315 { 1316 s = T("TeX input"); 1317 mp->mime = "application/x-tex"; 1318 goto qualify; 1319 } 1320 if (mp->fbsz < SF_BUFSIZE && 1321 (mp->multi['('] == mp->multi[')'] && 1322 mp->multi['{'] == mp->multi['}'] && 1323 mp->multi['['] == mp->multi[']']) || 1324 mp->fbsz >= SF_BUFSIZE && 1325 (mp->multi['('] >= mp->multi[')'] && 1326 mp->multi['{'] >= mp->multi['}'] && 1327 mp->multi['['] >= mp->multi[']'])) 1328 { 1329 c = mp->identifier[ID_INCL1]; 1330 if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c || 1331 mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 || 1332 mp->count['='] >= 20 && mp->count[';'] >= 20) 1333 { 1334 id_c: 1335 t1 = ""; 1336 t2 = "c "; 1337 t3 = T("program"); 1338 switch (*suff) 1339 { 1340 case 'c': 1341 case 'C': 1342 mp->mime = "application/x-cc"; 1343 break; 1344 case 'l': 1345 case 'L': 1346 t1 = "lex "; 1347 mp->mime = "application/x-lex"; 1348 break; 1349 default: 1350 t3 = T("header"); 1351 if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5) 1352 { 1353 mp->mime = "application/x-cc"; 1354 break; 1355 } 1356 /*FALLTHROUGH*/ 1357 case 'y': 1358 case 'Y': 1359 t1 = "yacc "; 1360 mp->mime = "application/x-yacc"; 1361 break; 1362 } 1363 if (mp->identifier[ID_CPLUSPLUS] >= 3) 1364 { 1365 t2 = "c++ "; 1366 mp->mime = "application/x-c++"; 1367 } 1368 sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3); 1369 s = mp->sbuf; 1370 goto qualify; 1371 } 1372 } 1373 if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 && 1374 (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] || 1375 mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2])) 1376 { 1377 id_mam: 1378 s = T("mam program"); 1379 mp->mime = "application/x-mam"; 1380 goto qualify; 1381 } 1382 if (mp->identifier[ID_FORTRAN] >= 8) 1383 { 1384 id_fortran: 1385 s = T("fortran program"); 1386 mp->mime = "application/x-fortran"; 1387 goto qualify; 1388 } 1389 if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2) 1390 { 1391 id_html: 1392 s = T("html input"); 1393 mp->mime = "text/html"; 1394 goto qualify; 1395 } 1396 if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1397 { 1398 id_copybook: 1399 s = T("cobol copybook"); 1400 mp->mime = "application/x-cobol"; 1401 goto qualify; 1402 } 1403 if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1404 { 1405 id_cobol: 1406 s = T("cobol program"); 1407 mp->mime = "application/x-cobol"; 1408 goto qualify; 1409 } 1410 if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1411 { 1412 id_pl1: 1413 s = T("pl1 program"); 1414 mp->mime = "application/x-pl1"; 1415 goto qualify; 1416 } 1417 if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{']) 1418 { 1419 id_tex: 1420 s = T("TeX input"); 1421 mp->mime = "text/tex"; 1422 goto qualify; 1423 } 1424 if (mp->identifier[ID_ASM] >= 4) 1425 { 1426 id_asm: 1427 s = T("as program"); 1428 mp->mime = "application/x-as"; 1429 goto qualify; 1430 } 1431 if (ckenglish(mp, pun, badpun)) 1432 { 1433 s = T("english text"); 1434 mp->mime = "text/plain"; 1435 goto qualify; 1436 } 1437 } 1438 else if (streq(base, "core")) 1439 { 1440 mp->mime = "x-system/core"; 1441 return T("core dump"); 1442 } 1443 if (flags & (CC_binary|CC_notext)) 1444 { 1445 b = (unsigned char*)mp->fbuf; 1446 e = b + mp->fbsz; 1447 n = 0; 1448 for (;;) 1449 { 1450 c = *b++; 1451 q = 0; 1452 while (c & 0x80) 1453 { 1454 c <<= 1; 1455 q++; 1456 } 1457 switch (q) 1458 { 1459 case 4: 1460 if (b < e && (*b++ & 0xc0) != 0x80) 1461 break; 1462 case 3: 1463 if (b < e && (*b++ & 0xc0) != 0x80) 1464 break; 1465 case 2: 1466 if (b < e && (*b++ & 0xc0) != 0x80) 1467 break; 1468 n = 1; 1469 case 0: 1470 if (b >= e) 1471 { 1472 if (n) 1473 { 1474 flags &= ~(CC_binary|CC_notext); 1475 flags |= CC_utf_8; 1476 } 1477 break; 1478 } 1479 continue; 1480 } 1481 break; 1482 } 1483 } 1484 if (flags & (CC_binary|CC_notext)) 1485 { 1486 unsigned long d = 0; 1487 1488 if ((q = mp->fbsz / UCHAR_MAX) >= 2) 1489 { 1490 /* 1491 * compression/encryption via standard deviation 1492 */ 1493 1494 1495 for (c = 0; c < UCHAR_MAX; c++) 1496 { 1497 pun = mp->count[c] - q; 1498 d += pun * pun; 1499 } 1500 d /= mp->fbsz; 1501 } 1502 if (d <= 0) 1503 s = T("binary"); 1504 else if (d < 4) 1505 s = T("encrypted"); 1506 else if (d < 16) 1507 s = T("packed"); 1508 else if (d < 64) 1509 s = T("compressed"); 1510 else if (d < 256) 1511 s = T("delta"); 1512 else 1513 s = T("data"); 1514 mp->mime = "application/octet-stream"; 1515 return s; 1516 } 1517 mp->mime = "text/plain"; 1518 if (flags & CC_utf_8) 1519 s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text"); 1520 else if (flags & CC_latin) 1521 s = (flags & CC_control) ? T("latin text with control characters") : T("latin text"); 1522 else 1523 s = (flags & CC_control) ? T("text with control characters") : T("text"); 1524 qualify: 1525 if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r']) 1526 { 1527 t = "dos "; 1528 mp->mime = "text/dos"; 1529 } 1530 else 1531 t = ""; 1532 if (code) 1533 { 1534 if (code == CC_ASCII) 1535 sfsprintf(buf, end - buf, "ascii %s%s", t, s); 1536 else 1537 { 1538 sfsprintf(buf, end - buf, "ebcdic%d %s%s", code - 1, t, s); 1539 mp->mime = "text/ebcdic"; 1540 } 1541 s = buf; 1542 } 1543 else if (*t) 1544 { 1545 sfsprintf(buf, end - buf, "%s%s", t, s); 1546 s = buf; 1547 } 1548 return s; 1549} 1550 1551/* 1552 * return the basic magic string for file,st in buf,size 1553 */ 1554 1555static char* 1556type(register Magic_t* mp, const char* file, struct stat* st, char* buf, char* end) 1557{ 1558 register char* s; 1559 register char* t; 1560 1561 mp->mime = 0; 1562 if (!S_ISREG(st->st_mode)) 1563 { 1564 if (S_ISDIR(st->st_mode)) 1565 { 1566 mp->mime = "x-system/dir"; 1567 return T("directory"); 1568 } 1569 if (S_ISLNK(st->st_mode)) 1570 { 1571 mp->mime = "x-system/lnk"; 1572 s = buf; 1573 s += sfsprintf(s, end - s, T("symbolic link to ")); 1574 if (pathgetlink(file, s, end - s) < 0) 1575 return T("cannot read symbolic link text"); 1576 return buf; 1577 } 1578 if (S_ISBLK(st->st_mode)) 1579 { 1580 mp->mime = "x-system/blk"; 1581 sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st)); 1582 return buf; 1583 } 1584 if (S_ISCHR(st->st_mode)) 1585 { 1586 mp->mime = "x-system/chr"; 1587 sfsprintf(buf, end - buf, T("character special (%s)"), fmtdev(st)); 1588 return buf; 1589 } 1590 if (S_ISFIFO(st->st_mode)) 1591 { 1592 mp->mime = "x-system/fifo"; 1593 return "fifo"; 1594 } 1595#ifdef S_ISSOCK 1596 if (S_ISSOCK(st->st_mode)) 1597 { 1598 mp->mime = "x-system/sock"; 1599 return "socket"; 1600 } 1601#endif 1602 } 1603 if (!(mp->fbmx = st->st_size)) 1604 s = T("empty"); 1605 else if (!mp->fp) 1606 s = T("cannot read"); 1607 else 1608 { 1609 mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1); 1610 if (mp->fbsz < 0) 1611 s = fmterror(errno); 1612 else if (mp->fbsz == 0) 1613 s = T("empty"); 1614 else 1615 { 1616 mp->fbuf[mp->fbsz] = 0; 1617 mp->xoff = 0; 1618 mp->xbsz = 0; 1619 if (!(s = ckmagic(mp, file, buf, end, st, 0))) 1620 s = cklang(mp, file, buf, end, st); 1621 } 1622 } 1623 if (!mp->mime) 1624 mp->mime = "application/unknown"; 1625 else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2)) 1626 { 1627 register char* b; 1628 register char* be; 1629 register char* m; 1630 register char* me; 1631 1632 b = mp->mime; 1633 me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1; 1634 while (m < me && b < t) 1635 *m++ = *b++; 1636 b = t = s; 1637 for (;;) 1638 { 1639 if (!(be = strchr(t, ' '))) 1640 { 1641 be = b + strlen(b); 1642 break; 1643 } 1644 if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4)) 1645 break; 1646 b = t; 1647 t = be + 1; 1648 } 1649 while (m < me && b < be) 1650 if ((*m++ = *b++) == ' ') 1651 *(m - 1) = '-'; 1652 *m = 0; 1653 } 1654 return s; 1655} 1656 1657/* 1658 * low level for magicload() 1659 */ 1660 1661static int 1662load(register Magic_t* mp, char* file, register Sfio_t* fp) 1663{ 1664 register Entry_t* ep; 1665 register char* p; 1666 register char* p2; 1667 char* p3; 1668 char* next; 1669 int n; 1670 int lge; 1671 int lev; 1672 int ent; 1673 int old; 1674 int cont; 1675 Info_t* ip; 1676 Entry_t* ret; 1677 Entry_t* first; 1678 Entry_t* last = 0; 1679 Entry_t* fun['z' - 'a' + 1]; 1680 1681 memzero(fun, sizeof(fun)); 1682 cont = '$'; 1683 ent = 0; 1684 lev = 0; 1685 old = 0; 1686 ret = 0; 1687 error_info.file = file; 1688 error_info.line = 0; 1689 first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1690 while (p = sfgetr(fp, '\n', 1)) 1691 { 1692 error_info.line++; 1693 for (; isspace(*p); p++); 1694 1695 /* 1696 * nesting 1697 */ 1698 1699 switch (*p) 1700 { 1701 case 0: 1702 case '#': 1703 cont = '#'; 1704 continue; 1705 case '{': 1706 if (++lev < MAXNEST) 1707 ep->nest = *p; 1708 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1709 (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST); 1710 continue; 1711 case '}': 1712 if (!last || lev <= 0) 1713 { 1714 if (mp->disc->errorf) 1715 (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p); 1716 } 1717 else if (lev-- == ent) 1718 { 1719 ent = 0; 1720 ep->cont = ':'; 1721 ep->offset = ret->offset; 1722 ep->nest = ' '; 1723 ep->type = ' '; 1724 ep->op = ' '; 1725 ep->desc = "[RETURN]"; 1726 last = ep; 1727 ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1728 ret = 0; 1729 } 1730 else 1731 last->nest = *p; 1732 continue; 1733 default: 1734 if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|') 1735 { 1736 n = *p++; 1737 if (n >= 'a' && n <= 'z') 1738 n -= 'a'; 1739 else 1740 { 1741 if (mp->disc->errorf) 1742 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); 1743 n = 0; 1744 } 1745 if (ret && mp->disc->errorf) 1746 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); 1747 if (*p == '{') 1748 { 1749 ent = ++lev; 1750 ret = ep; 1751 ep->desc = "[FUNCTION]"; 1752 } 1753 else 1754 { 1755 if (*(p + 1) != ')' && mp->disc->errorf) 1756 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a'); 1757 ep->desc = "[CALL]"; 1758 } 1759 ep->cont = cont; 1760 ep->offset = n; 1761 ep->nest = ' '; 1762 ep->type = ' '; 1763 ep->op = ' '; 1764 last = ep; 1765 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1766 if (ret) 1767 fun[n] = last->value.lab = ep; 1768 else if (!(last->value.lab = fun[n]) && mp->disc->errorf) 1769 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); 1770 continue; 1771 } 1772 if (!ep->nest) 1773 ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' '; 1774 break; 1775 } 1776 1777 /* 1778 * continuation 1779 */ 1780 1781 cont = '$'; 1782 switch (*p) 1783 { 1784 case '>': 1785 old = 1; 1786 if (*(p + 1) == *p) 1787 { 1788 /* 1789 * old style nesting push 1790 */ 1791 1792 p++; 1793 old = 2; 1794 if (!lev && last) 1795 { 1796 lev = 1; 1797 last->nest = '{'; 1798 if (last->cont == '>') 1799 last->cont = '&'; 1800 ep->nest = '1'; 1801 } 1802 } 1803 /*FALLTHROUGH*/ 1804 case '+': 1805 case '&': 1806 case '|': 1807 ep->cont = *p++; 1808 break; 1809 default: 1810 if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf) 1811 (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p); 1812 /*FALLTHROUGH*/ 1813 case '*': 1814 case '0': case '1': case '2': case '3': case '4': 1815 case '5': case '6': case '7': case '8': case '9': 1816 ep->cont = (lev > 0) ? '&' : '#'; 1817 break; 1818 } 1819 switch (old) 1820 { 1821 case 1: 1822 old = 0; 1823 if (lev) 1824 { 1825 /* 1826 * old style nesting pop 1827 */ 1828 1829 lev = 0; 1830 if (last) 1831 last->nest = '}'; 1832 ep->nest = ' '; 1833 if (ep->cont == '&') 1834 ep->cont = '#'; 1835 } 1836 break; 1837 case 2: 1838 old = 1; 1839 break; 1840 } 1841 if (isdigit(*p)) 1842 { 1843 /* 1844 * absolute offset 1845 */ 1846 1847 ep->offset = strton(p, &next, NiL, 0); 1848 p2 = next; 1849 } 1850 else 1851 { 1852 for (p2 = p; *p2 && !isspace(*p2); p2++); 1853 if (!*p2) 1854 { 1855 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1856 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); 1857 continue; 1858 } 1859 1860 /* 1861 * offset expression 1862 */ 1863 1864 *p2++ = 0; 1865 ep->expr = vmstrdup(mp->vm, p); 1866 if (isalpha(*p)) 1867 ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0; 1868 else if (*p == '(' && ep->cont == '>') 1869 { 1870 /* 1871 * convert old style indirection to @ 1872 */ 1873 1874 p = ep->expr + 1; 1875 for (;;) 1876 { 1877 switch (*p++) 1878 { 1879 case 0: 1880 case '@': 1881 case '(': 1882 break; 1883 case ')': 1884 break; 1885 default: 1886 continue; 1887 } 1888 break; 1889 } 1890 if (*--p == ')') 1891 { 1892 *p = 0; 1893 *ep->expr = '@'; 1894 } 1895 } 1896 } 1897 for (; isspace(*p2); p2++); 1898 for (p = p2; *p2 && !isspace(*p2); p2++); 1899 if (!*p2) 1900 { 1901 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1902 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); 1903 continue; 1904 } 1905 *p2++ = 0; 1906 1907 /* 1908 * type 1909 */ 1910 1911 if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e') 1912 { 1913 ep->swap = ~(*p == 'l' ? 7 : 0); 1914 p += 2; 1915 } 1916 if (*p == 's') 1917 { 1918 if (*(p + 1) == 'h') 1919 ep->type = 'h'; 1920 else 1921 ep->type = 's'; 1922 } 1923 else if (*p == 'a') 1924 ep->type = 's'; 1925 else 1926 ep->type = *p; 1927 if (p = strchr(p, '&')) 1928 { 1929 /* 1930 * old style mask 1931 */ 1932 1933 ep->mask = strton(++p, NiL, NiL, 0); 1934 } 1935 for (; isspace(*p2); p2++); 1936 if (ep->mask) 1937 *--p2 = '='; 1938 1939 /* 1940 * comparison operation 1941 */ 1942 1943 p = p2; 1944 if (p2 = strchr(p, '\t')) 1945 *p2++ = 0; 1946 else 1947 { 1948 int qe = 0; 1949 int qn = 0; 1950 1951 /* 1952 * assume balanced {}[]()\\""'' field 1953 */ 1954 1955 for (p2 = p;;) 1956 { 1957 switch (n = *p2++) 1958 { 1959 case 0: 1960 break; 1961 case '{': 1962 if (!qe) 1963 qe = '}'; 1964 if (qe == '}') 1965 qn++; 1966 continue; 1967 case '(': 1968 if (!qe) 1969 qe = ')'; 1970 if (qe == ')') 1971 qn++; 1972 continue; 1973 case '[': 1974 if (!qe) 1975 qe = ']'; 1976 if (qe == ']') 1977 qn++; 1978 continue; 1979 case '}': 1980 case ')': 1981 case ']': 1982 if (qe == n && qn > 0) 1983 qn--; 1984 continue; 1985 case '"': 1986 case '\'': 1987 if (!qe) 1988 qe = n; 1989 else if (qe == n) 1990 qe = 0; 1991 continue; 1992 case '\\': 1993 if (*p2) 1994 p2++; 1995 continue; 1996 default: 1997 if (!qe && isspace(n)) 1998 break; 1999 continue; 2000 } 2001 if (n) 2002 *(p2 - 1) = 0; 2003 else 2004 p2--; 2005 break; 2006 } 2007 } 2008 lge = 0; 2009 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 2010 ep->op = '='; 2011 else 2012 { 2013 if (*p == '&') 2014 { 2015 ep->mask = strton(++p, &next, NiL, 0); 2016 p = next; 2017 } 2018 switch (*p) 2019 { 2020 case '=': 2021 case '>': 2022 case '<': 2023 case '*': 2024 ep->op = *p++; 2025 if (*p == '=') 2026 { 2027 p++; 2028 switch (ep->op) 2029 { 2030 case '>': 2031 lge = -1; 2032 break; 2033 case '<': 2034 lge = 1; 2035 break; 2036 } 2037 } 2038 break; 2039 case '!': 2040 case '@': 2041 ep->op = *p++; 2042 if (*p == '=') 2043 p++; 2044 break; 2045 case 'x': 2046 p++; 2047 ep->op = '*'; 2048 break; 2049 default: 2050 ep->op = '='; 2051 if (ep->mask) 2052 ep->value.num = ep->mask; 2053 break; 2054 } 2055 } 2056 if (ep->op != '*' && !ep->value.num) 2057 { 2058 if (ep->type == 'e') 2059 { 2060 if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0)) 2061 { 2062 ep->value.sub->re_disc = &mp->redisc; 2063 if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE))) 2064 { 2065 p += ep->value.sub->re_npat; 2066 if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0))) 2067 p += ep->value.sub->re_npat; 2068 } 2069 if (n) 2070 { 2071 regmessage(mp, ep->value.sub, n); 2072 ep->value.sub = 0; 2073 } 2074 else if (*p && mp->disc->errorf) 2075 (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p); 2076 } 2077 } 2078 else if (ep->type == 'm') 2079 { 2080 ep->mask = stresc(p) + 1; 2081 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0); 2082 memcpy(ep->value.str, p, ep->mask); 2083 if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)")) 2084 ep->value.str[ep->mask - 1] = '*'; 2085 } 2086 else if (ep->type == 's') 2087 { 2088 ep->mask = stresc(p); 2089 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0); 2090 memcpy(ep->value.str, p, ep->mask); 2091 } 2092 else if (*p == '\'') 2093 { 2094 stresc(p); 2095 ep->value.num = *(unsigned char*)(p + 1) + lge; 2096 } 2097 else if (strmatch(p, "+([a-z])\\(*\\)")) 2098 { 2099 char* t; 2100 2101 t = p; 2102 ep->type = 'V'; 2103 ep->op = *p; 2104 while (*p && *p++ != '('); 2105 switch (ep->op) 2106 { 2107 case 'l': 2108 n = *p++; 2109 if (n < 'a' || n > 'z') 2110 { 2111 if (mp->disc->errorf) 2112 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); 2113 } 2114 else if (!fun[n -= 'a']) 2115 { 2116 if (mp->disc->errorf) 2117 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); 2118 } 2119 else 2120 { 2121 ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0); 2122 ep->value.loop->lab = fun[n]; 2123 while (*p && *p++ != ','); 2124 ep->value.loop->start = strton(p, &t, NiL, 0); 2125 while (*t && *t++ != ','); 2126 ep->value.loop->size = strton(t, &t, NiL, 0); 2127 } 2128 break; 2129 case 'm': 2130 case 'r': 2131 ep->desc = vmnewof(mp->vm, 0, char, 32, 0); 2132 ep->mime = vmnewof(mp->vm, 0, char, 32, 0); 2133 break; 2134 case 'v': 2135 break; 2136 default: 2137 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 2138 (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t); 2139 break; 2140 } 2141 } 2142 else 2143 { 2144 ep->value.num = strton(p, NiL, NiL, 0) + lge; 2145 if (ep->op == '@') 2146 ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num)); 2147 } 2148 } 2149 2150 /* 2151 * file description 2152 */ 2153 2154 if (p2) 2155 { 2156 for (; isspace(*p2); p2++); 2157 if (p = strchr(p2, '\t')) 2158 { 2159 /* 2160 * check for message catalog index 2161 */ 2162 2163 *p++ = 0; 2164 if (isalpha(*p2)) 2165 { 2166 for (p3 = p2; isalnum(*p3); p3++); 2167 if (*p3++ == ':') 2168 { 2169 for (; isdigit(*p3); p3++); 2170 if (!*p3) 2171 { 2172 for (p2 = p; isspace(*p2); p2++); 2173 if (p = strchr(p2, '\t')) 2174 *p++ = 0; 2175 } 2176 } 2177 } 2178 } 2179 stresc(p2); 2180 ep->desc = vmstrdup(mp->vm, p2); 2181 if (p) 2182 { 2183 for (; isspace(*p); p++); 2184 if (*p) 2185 ep->mime = vmstrdup(mp->vm, p); 2186 } 2187 } 2188 else 2189 ep->desc = ""; 2190 2191 /* 2192 * get next entry 2193 */ 2194 2195 last = ep; 2196 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 2197 } 2198 if (last) 2199 { 2200 last->next = 0; 2201 if (mp->magiclast) 2202 mp->magiclast->next = first; 2203 else 2204 mp->magic = first; 2205 mp->magiclast = last; 2206 } 2207 vmfree(mp->vm, ep); 2208 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 2209 { 2210 if (lev < 0) 2211 (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators"); 2212 else if (lev > 0) 2213 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators"); 2214 if (ret) 2215 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); 2216 } 2217 error_info.file = 0; 2218 error_info.line = 0; 2219 return 0; 2220} 2221 2222/* 2223 * load a magic file into mp 2224 */ 2225 2226int 2227magicload(register Magic_t* mp, const char* file, unsigned long flags) 2228{ 2229 register char* s; 2230 register char* e; 2231 register char* t; 2232 int n; 2233 int found; 2234 int list; 2235 Sfio_t* fp; 2236 2237 mp->flags = mp->disc->flags | flags; 2238 found = 0; 2239 if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1)) 2240 { 2241 if (!(s = getenv(MAGIC_FILE_ENV)) || !*s) 2242 s = MAGIC_FILE; 2243 } 2244 for (;;) 2245 { 2246 if (!list) 2247 e = 0; 2248 else if (e = strchr(s, ':')) 2249 { 2250 /* 2251 * ok, so ~ won't work for the last list element 2252 * we do it for MAGIC_FILES_ENV anyway 2253 */ 2254 2255 if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME"))) 2256 { 2257 sfputr(mp->tmp, t, -1); 2258 s += n - 1; 2259 } 2260 sfwrite(mp->tmp, s, e - s); 2261 if (!(s = sfstruse(mp->tmp))) 2262 goto nospace; 2263 } 2264 if (!*s || streq(s, "-")) 2265 s = MAGIC_FILE; 2266 if (!(fp = sfopen(NiL, s, "r"))) 2267 { 2268 if (list) 2269 { 2270 if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))) && !strchr(s, '/')) 2271 { 2272 strcpy(mp->fbuf, s); 2273 sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf); 2274 if (!(s = sfstruse(mp->tmp))) 2275 goto nospace; 2276 if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf)))) 2277 goto next; 2278 } 2279 if (!(fp = sfopen(NiL, t, "r"))) 2280 goto next; 2281 } 2282 else 2283 { 2284 if (mp->disc->errorf) 2285 (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s); 2286 return -1; 2287 } 2288 } 2289 found = 1; 2290 n = load(mp, s, fp); 2291 sfclose(fp); 2292 if (n && !list) 2293 return -1; 2294 next: 2295 if (!e) 2296 break; 2297 s = e + 1; 2298 } 2299 if (!found) 2300 { 2301 if (mp->flags & MAGIC_VERBOSE) 2302 { 2303 if (mp->disc->errorf) 2304 (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file"); 2305 } 2306 return -1; 2307 } 2308 return 0; 2309 nospace: 2310 if (mp->disc->errorf) 2311 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); 2312 return -1; 2313} 2314 2315/* 2316 * open a magic session 2317 */ 2318 2319Magic_t* 2320magicopen(Magicdisc_t* disc) 2321{ 2322 register Magic_t* mp; 2323 register int i; 2324 register int n; 2325 register int f; 2326 register int c; 2327 register Vmalloc_t* vm; 2328 unsigned char* map[CC_MAPS + 1]; 2329 2330 if (!(vm = vmopen(Vmdcheap, Vmbest, 0))) 2331 return 0; 2332 if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0))) 2333 { 2334 vmclose(vm); 2335 return 0; 2336 } 2337 mp->id = lib; 2338 mp->disc = disc; 2339 mp->vm = vm; 2340 mp->flags = disc->flags; 2341 mp->redisc.re_version = REG_VERSION; 2342 mp->redisc.re_flags = REG_NOFREE; 2343 mp->redisc.re_errorf = (regerror_t)disc->errorf; 2344 mp->redisc.re_resizef = (regresize_t)vmgetmem; 2345 mp->redisc.re_resizehandle = (void*)mp->vm; 2346 mp->dtdisc.key = offsetof(Info_t, name); 2347 mp->dtdisc.link = offsetof(Info_t, link); 2348 if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash))) 2349 goto bad; 2350 for (n = 0; n < elementsof(info); n++) 2351 dtinsert(mp->infotab, &info[n]); 2352 for (i = 0; i < CC_MAPS; i++) 2353 map[i] = ccmap(i, CC_ASCII); 2354 mp->x2n = ccmap(CC_ALIEN, CC_NATIVE); 2355 for (n = 0; n <= UCHAR_MAX; n++) 2356 { 2357 f = 0; 2358 i = CC_MAPS; 2359 while (--i >= 0) 2360 { 2361 c = ccmapchr(map[i], n); 2362 f = (f << CC_BIT) | CCTYPE(c); 2363 } 2364 mp->cctype[n] = f; 2365 } 2366 return mp; 2367 bad: 2368 magicclose(mp); 2369 return 0; 2370} 2371 2372/* 2373 * close a magicopen() session 2374 */ 2375 2376int 2377magicclose(register Magic_t* mp) 2378{ 2379 if (!mp) 2380 return -1; 2381 if (mp->tmp) 2382 sfstrclose(mp->tmp); 2383 if (mp->vm) 2384 vmclose(mp->vm); 2385 return 0; 2386} 2387 2388/* 2389 * return the magic string for file with optional stat info st 2390 */ 2391 2392char* 2393magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st) 2394{ 2395 off_t off; 2396 char* s; 2397 2398 mp->flags = mp->disc->flags; 2399 mp->mime = 0; 2400 if (!st) 2401 s = T("cannot stat"); 2402 else 2403 { 2404 if (mp->fp = fp) 2405 off = sfseek(mp->fp, (off_t)0, SEEK_CUR); 2406 s = type(mp, file, st, mp->tbuf, &mp->tbuf[sizeof(mp->tbuf)-1]); 2407 if (mp->fp) 2408 sfseek(mp->fp, off, SEEK_SET); 2409 if (!(mp->flags & (MAGIC_MIME|MAGIC_ALL))) 2410 { 2411 if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128)) 2412 sfprintf(mp->tmp, "%s ", T("short")); 2413 sfprintf(mp->tmp, "%s", s); 2414 if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) 2415 sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable")); 2416 if (st->st_mode & S_ISUID) 2417 sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid)); 2418 if (st->st_mode & S_ISGID) 2419 sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid)); 2420 if (st->st_mode & S_ISVTX) 2421 sfprintf(mp->tmp, ", sticky"); 2422 if (!(s = sfstruse(mp->tmp))) 2423 s = T("out of space"); 2424 } 2425 } 2426 if (mp->flags & MAGIC_MIME) 2427 s = mp->mime; 2428 if (!s) 2429 s = T("error"); 2430 return s; 2431} 2432 2433/* 2434 * list the magic table in mp on sp 2435 */ 2436 2437int 2438magiclist(register Magic_t* mp, register Sfio_t* sp) 2439{ 2440 register Entry_t* ep = mp->magic; 2441 register Entry_t* rp = 0; 2442 2443 mp->flags = mp->disc->flags; 2444 sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n"); 2445 while (ep) 2446 { 2447 sfprintf(sp, "%c %c\t", ep->cont, ep->nest); 2448 if (ep->expr) 2449 sfprintf(sp, "%s", ep->expr); 2450 else 2451 sfprintf(sp, "%ld", ep->offset); 2452 sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask); 2453 switch (ep->type) 2454 { 2455 case 'm': 2456 case 's': 2457 sfputr(sp, fmtesc(ep->value.str), -1); 2458 break; 2459 case 'V': 2460 switch (ep->op) 2461 { 2462 case 'l': 2463 sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset); 2464 break; 2465 case 'v': 2466 sfprintf(sp, "vcodex()"); 2467 break; 2468 default: 2469 sfprintf(sp, "%p", ep->value.str); 2470 break; 2471 } 2472 break; 2473 default: 2474 sfprintf(sp, "%lo", ep->value.num); 2475 break; 2476 } 2477 sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc)); 2478 if (ep->cont == '$' && !ep->value.lab->mask) 2479 { 2480 rp = ep; 2481 ep = ep->value.lab; 2482 } 2483 else 2484 { 2485 if (ep->cont == ':') 2486 { 2487 ep = rp; 2488 ep->value.lab->mask = 1; 2489 } 2490 ep = ep->next; 2491 } 2492 } 2493 return 0; 2494} 2495