1/*********************************************************************** 2* * 3* This software is part of the ast package * 4* Copyright (c) 1985-2011 AT&T Intellectual Property * 5* and is licensed under the * 6* Eclipse Public License, Version 1.0 * 7* by AT&T Intellectual Property * 8* * 9* A copy of the License is available at * 10* http://www.eclipse.org/org/documents/epl-v10.html * 11* (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12* * 13* Information and Software Systems Research * 14* AT&T Research * 15* Florham Park NJ * 16* * 17* Glenn Fowler <gsf@research.att.com> * 18* David Korn <dgk@research.att.com> * 19* Phong Vo <kpv@research.att.com> * 20* * 21***********************************************************************/ 22#pragma prototyped 23/* 24 * Glenn Fowler 25 * AT&T Research 26 * 27 * library interface to file 28 * 29 * the sum of the hacks {s5,v10,planix} is _____ than the parts 30 */ 31 32static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2011-03-09 $\0\n"; 33 34static const char lib[] = "libast:magic"; 35 36#include <ast.h> 37#include <ctype.h> 38#include <ccode.h> 39#include <dt.h> 40#include <modex.h> 41#include <error.h> 42#include <regex.h> 43#include <swap.h> 44 45#define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m) 46 47#define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE) 48 49#define MAXNEST 10 /* { ... } nesting limit */ 50#define MINITEM 4 /* magic buffer rounding */ 51 52typedef struct /* identifier dictionary entry */ 53{ 54 const char name[16]; /* identifier name */ 55 int value; /* identifier value */ 56 Dtlink_t link; /* dictionary link */ 57} Info_t; 58 59typedef struct Edit /* edit substitution */ 60{ 61 struct Edit* next; /* next in list */ 62 regex_t* from; /* from pattern */ 63} Edit_t; 64 65struct Entry; 66 67typedef struct /* loop info */ 68{ 69 struct Entry* lab; /* call this function */ 70 int start; /* start here */ 71 int size; /* increment by this amount */ 72 int count; /* dynamic loop count */ 73 int offset; /* dynamic offset */ 74} Loop_t; 75 76typedef struct Entry /* magic file entry */ 77{ 78 struct Entry* next; /* next in list */ 79 char* expr; /* offset expression */ 80 union 81 { 82 unsigned long num; 83 char* str; 84 struct Entry* lab; 85 regex_t* sub; 86 Loop_t* loop; 87 } value; /* comparison value */ 88 char* desc; /* file description */ 89 char* mime; /* file mime type */ 90 unsigned long offset; /* offset in bytes */ 91 unsigned long mask; /* mask before compare */ 92 char cont; /* continuation operation */ 93 char type; /* datum type */ 94 char op; /* comparison operation */ 95 char nest; /* { or } nesting operation */ 96 char swap; /* forced swap order */ 97} Entry_t; 98 99#define CC_BIT 5 100 101#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2) 102typedef unsigned short Cctype_t; 103#else 104typedef unsigned long Cctype_t; 105#endif 106 107#define CC_text 0x01 108#define CC_control 0x02 109#define CC_latin 0x04 110#define CC_binary 0x08 111#define CC_utf_8 0x10 112 113#define CC_notext CC_text /* CC_text is flipped before checking */ 114 115#define CC_MASK (CC_binary|CC_latin|CC_control|CC_text) 116 117#define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text) 118 119#define ID_NONE 0 120#define ID_ASM 1 121#define ID_C 2 122#define ID_COBOL 3 123#define ID_COPYBOOK 4 124#define ID_CPLUSPLUS 5 125#define ID_FORTRAN 6 126#define ID_HTML 7 127#define ID_INCL1 8 128#define ID_INCL2 9 129#define ID_INCL3 10 130#define ID_MAM1 11 131#define ID_MAM2 12 132#define ID_MAM3 13 133#define ID_NOTEXT 14 134#define ID_PL1 15 135#define ID_YACC 16 136 137#define ID_MAX ID_YACC 138 139#define INFO_atime 1 140#define INFO_blocks 2 141#define INFO_ctime 3 142#define INFO_fstype 4 143#define INFO_gid 5 144#define INFO_mode 6 145#define INFO_mtime 7 146#define INFO_name 8 147#define INFO_nlink 9 148#define INFO_size 10 149#define INFO_uid 11 150 151#define _MAGIC_PRIVATE_ \ 152 Magicdisc_t* disc; /* discipline */ \ 153 Vmalloc_t* vm; /* vmalloc region */ \ 154 Entry_t* magic; /* parsed magic table */ \ 155 Entry_t* magiclast; /* last entry in magic */ \ 156 char* mime; /* MIME type */ \ 157 unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \ 158 char fbuf[SF_BUFSIZE + 1]; /* file data */ \ 159 char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \ 160 char nbuf[256]; /* !CC_NATIVE data */ \ 161 char mbuf[64]; /* mime string */ \ 162 char sbuf[64]; /* type suffix string */ \ 163 char tbuf[2 * PATH_MAX]; /* type string */ \ 164 Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \ 165 unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \ 166 unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \ 167 int keep[MAXNEST]; /* ckmagic nest stack */ \ 168 char* cap[MAXNEST]; /* ckmagic mime stack */ \ 169 char* msg[MAXNEST]; /* ckmagic text stack */ \ 170 Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \ 171 int fbsz; /* fbuf size */ \ 172 int fbmx; /* fbuf max size */ \ 173 int xbsz; /* xbuf size */ \ 174 int swap; /* swap() operation */ \ 175 unsigned long flags; /* disc+open flags */ \ 176 long xoff; /* xbuf offset */ \ 177 int identifier[ID_MAX + 1]; /* Info_t identifier */ \ 178 Sfio_t* fp; /* fbuf fp */ \ 179 Sfio_t* tmp; /* tmp string */ \ 180 regdisc_t redisc; /* regex discipline */ \ 181 Dtdisc_t dtdisc; /* dict discipline */ \ 182 Dt_t* idtab; /* identifier dict */ \ 183 Dt_t* infotab; /* info keyword dict */ 184 185#include <magic.h> 186 187static Info_t dict[] = /* keyword dictionary */ 188{ 189 { "COMMON", ID_FORTRAN }, 190 { "COMPUTE", ID_COBOL }, 191 { "COMP", ID_COPYBOOK }, 192 { "COMPUTATIONAL",ID_COPYBOOK }, 193 { "DCL", ID_PL1 }, 194 { "DEFINED", ID_PL1 }, 195 { "DIMENSION", ID_FORTRAN }, 196 { "DIVISION", ID_COBOL }, 197 { "FILLER", ID_COPYBOOK }, 198 { "FIXED", ID_PL1 }, 199 { "FUNCTION", ID_FORTRAN }, 200 { "HTML", ID_HTML }, 201 { "INTEGER", ID_FORTRAN }, 202 { "MAIN", ID_PL1 }, 203 { "OPTIONS", ID_PL1 }, 204 { "PERFORM", ID_COBOL }, 205 { "PIC", ID_COPYBOOK }, 206 { "REAL", ID_FORTRAN }, 207 { "REDEFINES", ID_COPYBOOK }, 208 { "S9", ID_COPYBOOK }, 209 { "SECTION", ID_COBOL }, 210 { "SELECT", ID_COBOL }, 211 { "SUBROUTINE", ID_FORTRAN }, 212 { "TEXT", ID_ASM }, 213 { "VALUE", ID_COPYBOOK }, 214 { "attr", ID_MAM3 }, 215 { "binary", ID_YACC }, 216 { "block", ID_FORTRAN }, 217 { "bss", ID_ASM }, 218 { "byte", ID_ASM }, 219 { "char", ID_C }, 220 { "class", ID_CPLUSPLUS }, 221 { "clr", ID_NOTEXT }, 222 { "comm", ID_ASM }, 223 { "common", ID_FORTRAN }, 224 { "data", ID_ASM }, 225 { "dimension", ID_FORTRAN }, 226 { "done", ID_MAM2 }, 227 { "double", ID_C }, 228 { "even", ID_ASM }, 229 { "exec", ID_MAM3 }, 230 { "extern", ID_C }, 231 { "float", ID_C }, 232 { "function", ID_FORTRAN }, 233 { "globl", ID_ASM }, 234 { "h", ID_INCL3 }, 235 { "html", ID_HTML }, 236 { "include", ID_INCL1 }, 237 { "int", ID_C }, 238 { "integer", ID_FORTRAN }, 239 { "jmp", ID_NOTEXT }, 240 { "left", ID_YACC }, 241 { "libc", ID_INCL2 }, 242 { "long", ID_C }, 243 { "make", ID_MAM1 }, 244 { "mov", ID_NOTEXT }, 245 { "private", ID_CPLUSPLUS }, 246 { "public", ID_CPLUSPLUS }, 247 { "real", ID_FORTRAN }, 248 { "register", ID_C }, 249 { "right", ID_YACC }, 250 { "sfio", ID_INCL2 }, 251 { "static", ID_C }, 252 { "stdio", ID_INCL2 }, 253 { "struct", ID_C }, 254 { "subroutine", ID_FORTRAN }, 255 { "sys", ID_NOTEXT }, 256 { "term", ID_YACC }, 257 { "text", ID_ASM }, 258 { "tst", ID_NOTEXT }, 259 { "type", ID_YACC }, 260 { "typedef", ID_C }, 261 { "u", ID_INCL2 }, 262 { "union", ID_YACC }, 263 { "void", ID_C }, 264}; 265 266static Info_t info[] = 267{ 268 { "atime", INFO_atime }, 269 { "blocks", INFO_blocks }, 270 { "ctime", INFO_ctime }, 271 { "fstype", INFO_fstype }, 272 { "gid", INFO_gid }, 273 { "mode", INFO_mode }, 274 { "mtime", INFO_mtime }, 275 { "name", INFO_name }, 276 { "nlink", INFO_nlink }, 277 { "size", INFO_size }, 278 { "uid", INFO_uid }, 279}; 280 281/* 282 * return pointer to data at offset off and size siz 283 */ 284 285static char* 286getdata(register Magic_t* mp, register long off, register int siz) 287{ 288 register long n; 289 290 if (off < 0) 291 return 0; 292 if (off + siz <= mp->fbsz) 293 return mp->fbuf + off; 294 if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz) 295 { 296 if (off + siz > mp->fbmx) 297 return 0; 298 n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2); 299 if (sfseek(mp->fp, n, SEEK_SET) != n) 300 return 0; 301 if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0) 302 { 303 mp->xoff = 0; 304 mp->xbsz = 0; 305 return 0; 306 } 307 mp->xbuf[mp->xbsz] = 0; 308 mp->xoff = n; 309 if (off + siz > mp->xoff + mp->xbsz) 310 return 0; 311 } 312 return mp->xbuf + off - mp->xoff; 313} 314 315/* 316 * @... evaluator for strexpr() 317 */ 318 319static long 320indirect(const char* cs, char** e, void* handle) 321{ 322 register char* s = (char*)cs; 323 register Magic_t* mp = (Magic_t*)handle; 324 register long n = 0; 325 register char* p; 326 327 if (s) 328 { 329 if (*s == '@') 330 { 331 n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0); 332 switch (*(s = *e)) 333 { 334 case 'b': 335 case 'B': 336 s++; 337 if (p = getdata(mp, n, 1)) 338 n = *(unsigned char*)p; 339 else 340 s = (char*)cs; 341 break; 342 case 'h': 343 case 'H': 344 s++; 345 if (p = getdata(mp, n, 2)) 346 n = swapget(mp->swap, p, 2); 347 else 348 s = (char*)cs; 349 break; 350 case 'q': 351 case 'Q': 352 s++; 353 if (p = getdata(mp, n, 8)) 354 n = swapget(mp->swap, p, 8); 355 else 356 s = (char*)cs; 357 break; 358 default: 359 if (isalnum(*s)) 360 s++; 361 if (p = getdata(mp, n, 4)) 362 n = swapget(mp->swap, p, 4); 363 else 364 s = (char*)cs; 365 break; 366 } 367 } 368 *e = s; 369 } 370 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 371 (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e); 372 return n; 373} 374 375/* 376 * emit regex error message 377 */ 378 379static void 380regmessage(Magic_t* mp, regex_t* re, int code) 381{ 382 char buf[128]; 383 384 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 385 { 386 regerror(code, re, buf, sizeof(buf)); 387 (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf); 388 } 389} 390 391/* 392 * decompose vcodex(3) method composition 393 */ 394 395static char* 396vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x) 397{ 398 unsigned char* map; 399 const char* o; 400 int c; 401 int n; 402 int i; 403 int a; 404 405 map = CCMAP(CC_ASCII, CC_NATIVE); 406 a = 0; 407 i = 1; 408 for (;;) 409 { 410 if (i) 411 i = 0; 412 else 413 *b++ = '^'; 414 if (m < (x - 1) && !*(m + 1)) 415 { 416 /* 417 * obsolete indices 418 */ 419 420 if (!a) 421 { 422 a = 1; 423 o = "old, "; 424 while (b < e && (c = *o++)) 425 *b++ = c; 426 } 427 switch (*m) 428 { 429 case 0: o = "delta"; break; 430 case 1: o = "huffman"; break; 431 case 2: o = "huffgroup"; break; 432 case 3: o = "arith"; break; 433 case 4: o = "bwt"; break; 434 case 5: o = "rle"; break; 435 case 6: o = "mtf"; break; 436 case 7: o = "transpose"; break; 437 case 8: o = "table"; break; 438 case 9: o = "huffpart"; break; 439 case 50: o = "map"; break; 440 case 100: o = "recfm"; break; 441 case 101: o = "ss7"; break; 442 default: o = "UNKNOWN"; break; 443 } 444 m += 2; 445 while (b < e && (c = *o++)) 446 *b++ = c; 447 } 448 else 449 while (b < e && m < x && (c = *m++)) 450 { 451 if (map) 452 c = map[c]; 453 *b++ = c; 454 } 455 if (b >= e) 456 break; 457 n = 0; 458 while (m < x) 459 { 460 n = (n<<7) | (*m & 0x7f); 461 if (!(*m++ & 0x80)) 462 break; 463 } 464 if (n >= (x - m)) 465 break; 466 m += n; 467 } 468 return b; 469} 470 471/* 472 * check for magic table match in buf 473 */ 474 475static char* 476ckmagic(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st, unsigned long off) 477{ 478 register Entry_t* ep; 479 register char* p; 480 register char* b; 481 register int level = 0; 482 int call = -1; 483 int all = 0; 484 int c; 485 int str; 486 char* q; 487 char* t; 488 char* cur; 489 char* base = 0; 490 unsigned long num; 491 unsigned long mask; 492 regmatch_t matches[10]; 493 494 mp->swap = 0; 495 b = mp->msg[0] = cur = buf; 496 mp->mime = mp->cap[0] = 0; 497 mp->keep[0] = 0; 498 for (ep = mp->magic; ep; ep = ep->next) 499 { 500 fun: 501 if (ep->nest == '{') 502 { 503 if (++level >= MAXNEST) 504 { 505 call = -1; 506 level = 0; 507 mp->keep[0] = 0; 508 b = mp->msg[0]; 509 mp->mime = mp->cap[0]; 510 continue; 511 } 512 mp->keep[level] = mp->keep[level - 1] != 0; 513 mp->msg[level] = b; 514 mp->cap[level] = mp->mime; 515 } 516 switch (ep->cont) 517 { 518 case '#': 519 if (mp->keep[level] && b > cur) 520 { 521 if ((mp->flags & MAGIC_ALL) && b < (end - 3)) 522 { 523 all = 1; 524 *b++ = '\n'; 525 cur = b; 526 continue; 527 } 528 *b = 0; 529 return buf; 530 } 531 mp->swap = 0; 532 b = mp->msg[0] = cur; 533 mp->mime = mp->cap[0] = 0; 534 if (ep->type == ' ') 535 continue; 536 break; 537 case '$': 538 if (mp->keep[level] && call < (MAXNEST - 1)) 539 { 540 mp->ret[++call] = ep; 541 ep = ep->value.lab; 542 goto fun; 543 } 544 continue; 545 case ':': 546 ep = mp->ret[call--]; 547 if (ep->op == 'l') 548 goto fun; 549 continue; 550 case '|': 551 if (mp->keep[level] > 1) 552 goto checknest; 553 /*FALLTHROUGH*/ 554 default: 555 if (!mp->keep[level]) 556 { 557 b = mp->msg[level]; 558 mp->mime = mp->cap[level]; 559 goto checknest; 560 } 561 break; 562 } 563 p = ""; 564 num = 0; 565 if (!ep->expr) 566 num = ep->offset + off; 567 else 568 switch (ep->offset) 569 { 570 case 0: 571 num = strexpr(ep->expr, NiL, indirect, mp) + off; 572 break; 573 case INFO_atime: 574 num = st->st_atime; 575 ep->type = 'D'; 576 break; 577 case INFO_blocks: 578 num = iblocks(st); 579 ep->type = 'N'; 580 break; 581 case INFO_ctime: 582 num = st->st_ctime; 583 ep->type = 'D'; 584 break; 585 case INFO_fstype: 586 p = fmtfs(st); 587 ep->type = toupper(ep->type); 588 break; 589 case INFO_gid: 590 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 591 { 592 p = fmtgid(st->st_gid); 593 ep->type = toupper(ep->type); 594 } 595 else 596 { 597 num = st->st_gid; 598 ep->type = 'N'; 599 } 600 break; 601 case INFO_mode: 602 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 603 { 604 p = fmtmode(st->st_mode, 0); 605 ep->type = toupper(ep->type); 606 } 607 else 608 { 609 num = modex(st->st_mode); 610 ep->type = 'N'; 611 } 612 break; 613 case INFO_mtime: 614 num = st->st_ctime; 615 ep->type = 'D'; 616 break; 617 case INFO_name: 618 if (!base) 619 { 620 if (base = strrchr(file, '/')) 621 base++; 622 else 623 base = (char*)file; 624 } 625 p = base; 626 ep->type = toupper(ep->type); 627 break; 628 case INFO_nlink: 629 num = st->st_nlink; 630 ep->type = 'N'; 631 break; 632 case INFO_size: 633 num = st->st_size; 634 ep->type = 'N'; 635 break; 636 case INFO_uid: 637 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 638 { 639 p = fmtuid(st->st_uid); 640 ep->type = toupper(ep->type); 641 } 642 else 643 { 644 num = st->st_uid; 645 ep->type = 'N'; 646 } 647 break; 648 } 649 switch (ep->type) 650 { 651 652 case 'b': 653 if (!(p = getdata(mp, num, 1))) 654 goto next; 655 num = *(unsigned char*)p; 656 break; 657 658 case 'h': 659 if (!(p = getdata(mp, num, 2))) 660 goto next; 661 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2); 662 break; 663 664 case 'd': 665 case 'l': 666 case 'v': 667 if (!(p = getdata(mp, num, 4))) 668 goto next; 669 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4); 670 break; 671 672 case 'q': 673 if (!(p = getdata(mp, num, 8))) 674 goto next; 675 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8); 676 break; 677 678 case 'e': 679 if (!(p = getdata(mp, num, 0))) 680 goto next; 681 /*FALLTHROUGH*/ 682 case 'E': 683 if (!ep->value.sub) 684 goto next; 685 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) 686 { 687 c = mp->fbsz; 688 if (c >= sizeof(mp->nbuf)) 689 c = sizeof(mp->nbuf) - 1; 690 p = (char*)memcpy(mp->nbuf, p, c); 691 p[c] = 0; 692 ccmapstr(mp->x2n, p, c); 693 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) 694 { 695 if (c != REG_NOMATCH) 696 regmessage(mp, ep->value.sub, c); 697 goto next; 698 } 699 } 700 p = ep->value.sub->re_sub->re_buf; 701 q = T(ep->desc); 702 t = *q ? q : p; 703 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b') 704 *b++ = ' '; 705 b += sfsprintf(b, end - b, *q ? q : "%s", p + (*p == '\b')); 706 if (ep->mime) 707 mp->mime = ep->mime; 708 goto checknest; 709 710 case 's': 711 if (!(p = getdata(mp, num, ep->mask))) 712 goto next; 713 goto checkstr; 714 case 'm': 715 if (!(p = getdata(mp, num, 0))) 716 goto next; 717 /*FALLTHROUGH*/ 718 case 'M': 719 case 'S': 720 checkstr: 721 for (;;) 722 { 723 if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p)) 724 break; 725 if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask)) 726 break; 727 if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf)) 728 goto next; 729 p = (char*)memcpy(mp->nbuf, p, ep->mask); 730 p[ep->mask] = 0; 731 ccmapstr(mp->x2n, p, ep->mask); 732 } 733 q = T(ep->desc); 734 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') 735 *b++ = ' '; 736 for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++); 737 *t = 0; 738 b += sfsprintf(b, end - b, q + (*q == '\b'), p); 739 *t = c; 740 if (ep->mime) 741 mp->mime = ep->mime; 742 goto checknest; 743 744 } 745 if (mask = ep->mask) 746 num &= mask; 747 switch (ep->op) 748 { 749 750 case '=': 751 case '@': 752 if (num == ep->value.num) 753 break; 754 if (ep->cont != '#') 755 goto next; 756 if (!mask) 757 mask = ~mask; 758 if (ep->type == 'h') 759 { 760 if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num) 761 { 762 if (!(mp->swap & (mp->swap + 1))) 763 mp->swap = 7; 764 goto swapped; 765 } 766 } 767 else if (ep->type == 'l') 768 { 769 for (c = 1; c < 4; c++) 770 if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num) 771 { 772 if (!(mp->swap & (mp->swap + 1))) 773 mp->swap = 7; 774 goto swapped; 775 } 776 } 777 else if (ep->type == 'q') 778 { 779 for (c = 1; c < 8; c++) 780 if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num) 781 goto swapped; 782 } 783 goto next; 784 785 case '!': 786 if (num != ep->value.num) 787 break; 788 goto next; 789 790 case '^': 791 if (num ^ ep->value.num) 792 break; 793 goto next; 794 795 case '>': 796 if (num > ep->value.num) 797 break; 798 goto next; 799 800 case '<': 801 if (num < ep->value.num) 802 break; 803 goto next; 804 805 case 'l': 806 if (num > 0 && mp->keep[level] && call < (MAXNEST - 1)) 807 { 808 if (!ep->value.loop->count) 809 { 810 ep->value.loop->count = num; 811 ep->value.loop->offset = off; 812 off = ep->value.loop->start; 813 } 814 else if (!--ep->value.loop->count) 815 { 816 off = ep->value.loop->offset; 817 goto next; 818 } 819 else 820 off += ep->value.loop->size; 821 mp->ret[++call] = ep; 822 ep = ep->value.loop->lab; 823 goto fun; 824 } 825 goto next; 826 827 case 'm': 828 c = mp->swap; 829 t = ckmagic(mp, file, b + (b > cur), end, st, num); 830 mp->swap = c; 831 if (t) 832 { 833 if (b > cur && b < end) 834 *b = ' '; 835 b += strlen(b); 836 } 837 else if (ep->cont == '&') 838 goto next; 839 break; 840 841 case 'r': 842#if _UWIN 843 { 844 char* e; 845 Sfio_t* rp; 846 Sfio_t* gp; 847 848 if (!(t = strrchr(file, '.'))) 849 goto next; 850 sfprintf(mp->tmp, "/reg/classes_root/%s", t); 851 if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r"))) 852 goto next; 853 *ep->desc = 0; 854 *ep->mime = 0; 855 gp = 0; 856 while (t = sfgetr(rp, '\n', 1)) 857 { 858 if (strneq(t, "Content Type=", 13)) 859 { 860 ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0); 861 strcpy(ep->mime, t + 13); 862 if (gp) 863 break; 864 } 865 else 866 { 867 sfprintf(mp->tmp, "/reg/classes_root/%s", t); 868 if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r"))) 869 { 870 ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1); 871 strcpy(ep->desc, t); 872 if (*ep->mime) 873 break; 874 } 875 } 876 } 877 sfclose(rp); 878 if (!gp) 879 goto next; 880 if (!*ep->mime) 881 { 882 t = T(ep->desc); 883 if (!strncasecmp(t, "microsoft", 9)) 884 t += 9; 885 while (isspace(*t)) 886 t++; 887 e = "application/x-ms-"; 888 ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e)); 889 e = strcopy(ep->mime, e); 890 while ((c = *t++) && c != '.' && c != ' ') 891 *e++ = isupper(c) ? tolower(c) : c; 892 *e = 0; 893 } 894 while (t = sfgetr(gp, '\n', 1)) 895 if (*t && !streq(t, "\"\"")) 896 { 897 ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0); 898 strcpy(ep->desc, t); 899 break; 900 } 901 sfclose(gp); 902 if (!*ep->desc) 903 goto next; 904 if (!t) 905 for (t = T(ep->desc); *t; t++) 906 if (*t == '.') 907 *t = ' '; 908 if (!mp->keep[level]) 909 mp->keep[level] = 2; 910 mp->mime = ep->mime; 911 break; 912 } 913#else 914 if (ep->cont == '#' && !mp->keep[level]) 915 mp->keep[level] = 1; 916 goto next; 917#endif 918 919 case 'v': 920 if (!(p = getdata(mp, num, 4))) 921 goto next; 922 c = 0; 923 do 924 { 925 num++; 926 c = (c<<7) | (*p & 0x7f); 927 } while (*p++ & 0x80); 928 if (!(p = getdata(mp, num, c))) 929 goto next; 930 if (mp->keep[level]++ && b > cur && b < (end - 1) && *(b - 1) != ' ') 931 { 932 *b++ = ','; 933 *b++ = ' '; 934 } 935 b = vcdecomp(b, cur + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c); 936 goto checknest; 937 938 } 939 swapped: 940 q = T(ep->desc); 941 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') 942 *b++ = ' '; 943 if (*q == '\b') 944 q++; 945 str = 0; 946 for (t = q; *t; t++) 947 if (*t == '%' && (c = *(t + 1))) 948 { 949 if (c == '%') 950 t++; 951 else 952 while (c && c != '%') 953 { 954 if (c == 's') 955 { 956 str = 1; 957 break; 958 } 959 else if (c == 'c' || c == 'd' || c == 'i' || c == 'u' || c == 'x' || c == 'X') 960 goto format; 961 t++; 962 c = *(t + 1); 963 } 964 } 965 format: 966 if (!str) 967 b += sfsprintf(b, end - b, q, num, num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0); 968 else if (ep->type == 'd' || ep->type == 'D') 969 b += sfsprintf(b, end - b, q, fmttime("%?%QL", (time_t)num), 0, 0, 0, 0, 0, 0, 0); 970 else if (ep->type == 'v') 971 b += sfsprintf(b, end - b, q, fmtversion(num), 0, 0, 0, 0, 0, 0, 0); 972 else 973 b += sfsprintf(b, end - b, q, fmtnum(num, 0), num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0); 974 if (ep->mime && *ep->mime) 975 mp->mime = ep->mime; 976 checknest: 977 if (ep->nest == '}') 978 { 979 if (!mp->keep[level]) 980 { 981 b = mp->msg[level]; 982 mp->mime = mp->cap[level]; 983 } 984 else if (level > 0) 985 mp->keep[level - 1] = mp->keep[level]; 986 if (--level < 0) 987 { 988 level = 0; 989 mp->keep[0] = 0; 990 } 991 } 992 continue; 993 next: 994 if (ep->cont == '&') 995 mp->keep[level] = 0; 996 goto checknest; 997 } 998 if (all && b-- || mp->keep[level] && b > cur) 999 { 1000 *b = 0; 1001 return buf; 1002 } 1003 return 0; 1004} 1005 1006/* 1007 * check english language stats 1008 */ 1009 1010static int 1011ckenglish(register Magic_t* mp, int pun, int badpun) 1012{ 1013 register char* s; 1014 register int vowl = 0; 1015 register int freq = 0; 1016 register int rare = 0; 1017 1018 if (5 * badpun > pun) 1019 return 0; 1020 if (2 * mp->count[';'] > mp->count['E'] + mp->count['e']) 1021 return 0; 1022 if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e']) 1023 return 0; 1024 for (s = "aeiou"; *s; s++) 1025 vowl += mp->count[toupper(*s)] + mp->count[*s]; 1026 for (s = "etaion"; *s; s++) 1027 freq += mp->count[toupper(*s)] + mp->count[*s]; 1028 for (s = "vjkqxz"; *s; s++) 1029 rare += mp->count[toupper(*s)] + mp->count[*s]; 1030 return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare; 1031} 1032 1033/* 1034 * check programming language stats 1035 */ 1036 1037static char* 1038cklang(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st) 1039{ 1040 register int c; 1041 register unsigned char* b; 1042 register unsigned char* e; 1043 register int q; 1044 register char* s; 1045 char* t; 1046 char* base; 1047 char* suff; 1048 char* t1; 1049 char* t2; 1050 char* t3; 1051 int n; 1052 int badpun; 1053 int code; 1054 int pun; 1055 Cctype_t flags; 1056 Info_t* ip; 1057 1058 b = (unsigned char*)mp->fbuf; 1059 e = b + mp->fbsz; 1060 memzero(mp->count, sizeof(mp->count)); 1061 memzero(mp->multi, sizeof(mp->multi)); 1062 memzero(mp->identifier, sizeof(mp->identifier)); 1063 1064 /* 1065 * check character coding 1066 */ 1067 1068 flags = 0; 1069 while (b < e) 1070 flags |= mp->cctype[*b++]; 1071 b = (unsigned char*)mp->fbuf; 1072 code = 0; 1073 q = CC_ASCII; 1074 n = CC_MASK; 1075 for (c = 0; c < CC_MAPS; c++) 1076 { 1077 flags ^= CC_text; 1078 if ((flags & CC_MASK) < n) 1079 { 1080 n = flags & CC_MASK; 1081 q = c; 1082 } 1083 flags >>= CC_BIT; 1084 } 1085 flags = n; 1086 if (!(flags & (CC_binary|CC_notext))) 1087 { 1088 if (q != CC_NATIVE) 1089 { 1090 code = q; 1091 ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE); 1092 } 1093 if (b[0] == '#' && b[1] == '!') 1094 { 1095 for (b += 2; b < e && isspace(*b); b++); 1096 for (s = (char*)b; b < e && isprint(*b); b++); 1097 c = *b; 1098 *b = 0; 1099 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK)) 1100 { 1101 if (t = strrchr(s, '/')) 1102 s = t + 1; 1103 for (t = s; *t; t++) 1104 if (isspace(*t)) 1105 { 1106 *t = 0; 1107 break; 1108 } 1109 sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh"); 1110 mp->mime = mp->mbuf; 1111 if (match(s, "*sh")) 1112 { 1113 t1 = T("command"); 1114 if (streq(s, "sh")) 1115 *s = 0; 1116 else 1117 { 1118 *b++ = ' '; 1119 *b = 0; 1120 } 1121 } 1122 else 1123 { 1124 t1 = T("interpreter"); 1125 *b++ = ' '; 1126 *b = 0; 1127 } 1128 sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1); 1129 s = mp->sbuf; 1130 goto qualify; 1131 } 1132 *b = c; 1133 b = (unsigned char*)mp->fbuf; 1134 } 1135 badpun = 0; 1136 pun = 0; 1137 q = 0; 1138 s = 0; 1139 t = 0; 1140 while (b < e) 1141 { 1142 c = *b++; 1143 mp->count[c]++; 1144 if (c == q && (q != '*' || *b == '/' && b++)) 1145 { 1146 mp->multi[q]++; 1147 q = 0; 1148 } 1149 else if (c == '\\') 1150 { 1151 s = 0; 1152 b++; 1153 } 1154 else if (!q) 1155 { 1156 if (isalpha(c) || c == '_') 1157 { 1158 if (!s) 1159 s = (char*)b - 1; 1160 } 1161 else if (!isdigit(c)) 1162 { 1163 if (s) 1164 { 1165 if (s > mp->fbuf) 1166 switch (*(s - 1)) 1167 { 1168 case ':': 1169 if (*b == ':') 1170 mp->multi[':']++; 1171 break; 1172 case '.': 1173 if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n')) 1174 mp->multi['.']++; 1175 break; 1176 case '\n': 1177 case '\\': 1178 if (*b == '{') 1179 t = (char*)b + 1; 1180 break; 1181 case '{': 1182 if (s == t && *b == '}') 1183 mp->multi['X']++; 1184 break; 1185 } 1186 if (!mp->idtab) 1187 { 1188 if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dtset)) 1189 for (q = 0; q < elementsof(dict); q++) 1190 dtinsert(mp->idtab, &dict[q]); 1191 else if (mp->disc->errorf) 1192 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); 1193 q = 0; 1194 } 1195 if (mp->idtab) 1196 { 1197 *(b - 1) = 0; 1198 if (ip = (Info_t*)dtmatch(mp->idtab, s)) 1199 mp->identifier[ip->value]++; 1200 *(b - 1) = c; 1201 } 1202 s = 0; 1203 } 1204 switch (c) 1205 { 1206 case '\t': 1207 if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n') 1208 mp->multi['\t']++; 1209 break; 1210 case '"': 1211 case '\'': 1212 q = c; 1213 break; 1214 case '/': 1215 if (*b == '*') 1216 q = *b++; 1217 else if (*b == '/') 1218 q = '\n'; 1219 break; 1220 case '$': 1221 if (*b == '(' && *(b + 1) != ' ') 1222 mp->multi['$']++; 1223 break; 1224 case '{': 1225 case '}': 1226 case '[': 1227 case ']': 1228 case '(': 1229 mp->multi[c]++; 1230 break; 1231 case ')': 1232 mp->multi[c]++; 1233 goto punctuation; 1234 case ':': 1235 if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2))) 1236 mp->multi[':']++; 1237 goto punctuation; 1238 case '.': 1239 case ',': 1240 case '%': 1241 case ';': 1242 case '?': 1243 punctuation: 1244 pun++; 1245 if (*b != ' ' && *b != '\n') 1246 badpun++; 1247 break; 1248 } 1249 } 1250 } 1251 } 1252 } 1253 else 1254 while (b < e) 1255 mp->count[*b++]++; 1256 base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file; 1257 suff = (t1 = strrchr(base, '.')) ? t1 + 1 : ""; 1258 if (!flags) 1259 { 1260 if (match(suff, "*sh|bat|cmd")) 1261 goto id_sh; 1262 if (match(base, "*@(mkfile)")) 1263 goto id_mk; 1264 if (match(base, "*@(makefile|.mk)")) 1265 goto id_make; 1266 if (match(base, "*@(mamfile|.mam)")) 1267 goto id_mam; 1268 if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy")) 1269 goto id_c; 1270 if (match(suff, "f")) 1271 goto id_fortran; 1272 if (match(suff, "htm+(l)")) 1273 goto id_html; 1274 if (match(suff, "cpy")) 1275 goto id_copybook; 1276 if (match(suff, "cob|cbl|cb2")) 1277 goto id_cobol; 1278 if (match(suff, "pl[1i]")) 1279 goto id_pl1; 1280 if (match(suff, "tex")) 1281 goto id_tex; 1282 if (match(suff, "asm|s")) 1283 goto id_asm; 1284 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.'))) 1285 { 1286 id_sh: 1287 s = T("command script"); 1288 mp->mime = "application/sh"; 1289 goto qualify; 1290 } 1291 if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *")) 1292 { 1293 s = T("mail message"); 1294 mp->mime = "message/rfc822"; 1295 goto qualify; 1296 } 1297 if (match(base, "*@(mkfile)")) 1298 { 1299 id_mk: 1300 s = "mkfile"; 1301 mp->mime = "application/mk"; 1302 goto qualify; 1303 } 1304 if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0)) 1305 { 1306 id_make: 1307 s = "makefile"; 1308 mp->mime = "application/make"; 1309 goto qualify; 1310 } 1311 if (mp->multi['.'] >= 3) 1312 { 1313 s = T("nroff input"); 1314 mp->mime = "application/x-troff"; 1315 goto qualify; 1316 } 1317 if (mp->multi['X'] >= 3) 1318 { 1319 s = T("TeX input"); 1320 mp->mime = "application/x-tex"; 1321 goto qualify; 1322 } 1323 if (mp->fbsz < SF_BUFSIZE && 1324 (mp->multi['('] == mp->multi[')'] && 1325 mp->multi['{'] == mp->multi['}'] && 1326 mp->multi['['] == mp->multi[']']) || 1327 mp->fbsz >= SF_BUFSIZE && 1328 (mp->multi['('] >= mp->multi[')'] && 1329 mp->multi['{'] >= mp->multi['}'] && 1330 mp->multi['['] >= mp->multi[']'])) 1331 { 1332 c = mp->identifier[ID_INCL1]; 1333 if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c || 1334 mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 || 1335 mp->count['='] >= 20 && mp->count[';'] >= 20) 1336 { 1337 id_c: 1338 t1 = ""; 1339 t2 = "c "; 1340 t3 = T("program"); 1341 switch (*suff) 1342 { 1343 case 'c': 1344 case 'C': 1345 mp->mime = "application/x-cc"; 1346 break; 1347 case 'l': 1348 case 'L': 1349 t1 = "lex "; 1350 mp->mime = "application/x-lex"; 1351 break; 1352 default: 1353 t3 = T("header"); 1354 if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5) 1355 { 1356 mp->mime = "application/x-cc"; 1357 break; 1358 } 1359 /*FALLTHROUGH*/ 1360 case 'y': 1361 case 'Y': 1362 t1 = "yacc "; 1363 mp->mime = "application/x-yacc"; 1364 break; 1365 } 1366 if (mp->identifier[ID_CPLUSPLUS] >= 3) 1367 { 1368 t2 = "c++ "; 1369 mp->mime = "application/x-c++"; 1370 } 1371 sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3); 1372 s = mp->sbuf; 1373 goto qualify; 1374 } 1375 } 1376 if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 && 1377 (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] || 1378 mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2])) 1379 { 1380 id_mam: 1381 s = T("mam program"); 1382 mp->mime = "application/x-mam"; 1383 goto qualify; 1384 } 1385 if (mp->identifier[ID_FORTRAN] >= 8) 1386 { 1387 id_fortran: 1388 s = T("fortran program"); 1389 mp->mime = "application/x-fortran"; 1390 goto qualify; 1391 } 1392 if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2) 1393 { 1394 id_html: 1395 s = T("html input"); 1396 mp->mime = "text/html"; 1397 goto qualify; 1398 } 1399 if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1400 { 1401 id_copybook: 1402 s = T("cobol copybook"); 1403 mp->mime = "application/x-cobol"; 1404 goto qualify; 1405 } 1406 if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1407 { 1408 id_cobol: 1409 s = T("cobol program"); 1410 mp->mime = "application/x-cobol"; 1411 goto qualify; 1412 } 1413 if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1414 { 1415 id_pl1: 1416 s = T("pl1 program"); 1417 mp->mime = "application/x-pl1"; 1418 goto qualify; 1419 } 1420 if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{']) 1421 { 1422 id_tex: 1423 s = T("TeX input"); 1424 mp->mime = "text/tex"; 1425 goto qualify; 1426 } 1427 if (mp->identifier[ID_ASM] >= 4) 1428 { 1429 id_asm: 1430 s = T("as program"); 1431 mp->mime = "application/x-as"; 1432 goto qualify; 1433 } 1434 if (ckenglish(mp, pun, badpun)) 1435 { 1436 s = T("english text"); 1437 mp->mime = "text/plain"; 1438 goto qualify; 1439 } 1440 } 1441 else if (streq(base, "core")) 1442 { 1443 mp->mime = "x-system/core"; 1444 return T("core dump"); 1445 } 1446 if (flags & (CC_binary|CC_notext)) 1447 { 1448 b = (unsigned char*)mp->fbuf; 1449 e = b + mp->fbsz; 1450 n = 0; 1451 for (;;) 1452 { 1453 c = *b++; 1454 q = 0; 1455 while (c & 0x80) 1456 { 1457 c <<= 1; 1458 q++; 1459 } 1460 switch (q) 1461 { 1462 case 4: 1463 if (b < e && (*b++ & 0xc0) != 0x80) 1464 break; 1465 case 3: 1466 if (b < e && (*b++ & 0xc0) != 0x80) 1467 break; 1468 case 2: 1469 if (b < e && (*b++ & 0xc0) != 0x80) 1470 break; 1471 n = 1; 1472 case 0: 1473 if (b >= e) 1474 { 1475 if (n) 1476 { 1477 flags &= ~(CC_binary|CC_notext); 1478 flags |= CC_utf_8; 1479 } 1480 break; 1481 } 1482 continue; 1483 } 1484 break; 1485 } 1486 } 1487 if (flags & (CC_binary|CC_notext)) 1488 { 1489 unsigned long d = 0; 1490 1491 if ((q = mp->fbsz / UCHAR_MAX) >= 2) 1492 { 1493 /* 1494 * compression/encryption via standard deviation 1495 */ 1496 1497 1498 for (c = 0; c < UCHAR_MAX; c++) 1499 { 1500 pun = mp->count[c] - q; 1501 d += pun * pun; 1502 } 1503 d /= mp->fbsz; 1504 } 1505 if (d <= 0) 1506 s = T("binary"); 1507 else if (d < 4) 1508 s = T("encrypted"); 1509 else if (d < 16) 1510 s = T("packed"); 1511 else if (d < 64) 1512 s = T("compressed"); 1513 else if (d < 256) 1514 s = T("delta"); 1515 else 1516 s = T("data"); 1517 mp->mime = "application/octet-stream"; 1518 return s; 1519 } 1520 mp->mime = "text/plain"; 1521 if (flags & CC_utf_8) 1522 s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text"); 1523 else if (flags & CC_latin) 1524 s = (flags & CC_control) ? T("latin text with control characters") : T("latin text"); 1525 else 1526 s = (flags & CC_control) ? T("text with control characters") : T("text"); 1527 qualify: 1528 if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r']) 1529 { 1530 t = "dos "; 1531 mp->mime = "text/dos"; 1532 } 1533 else 1534 t = ""; 1535 if (code) 1536 { 1537 if (code == CC_ASCII) 1538 sfsprintf(buf, end - buf, "ascii %s%s", t, s); 1539 else 1540 { 1541 sfsprintf(buf, end - buf, "ebcdic%d %s%s", code - 1, t, s); 1542 mp->mime = "text/ebcdic"; 1543 } 1544 s = buf; 1545 } 1546 else if (*t) 1547 { 1548 sfsprintf(buf, end - buf, "%s%s", t, s); 1549 s = buf; 1550 } 1551 return s; 1552} 1553 1554/* 1555 * return the basic magic string for file,st in buf,size 1556 */ 1557 1558static char* 1559type(register Magic_t* mp, const char* file, struct stat* st, char* buf, char* end) 1560{ 1561 register char* s; 1562 register char* t; 1563 1564 mp->mime = 0; 1565 if (!S_ISREG(st->st_mode)) 1566 { 1567 if (S_ISDIR(st->st_mode)) 1568 { 1569 mp->mime = "x-system/dir"; 1570 return T("directory"); 1571 } 1572 if (S_ISLNK(st->st_mode)) 1573 { 1574 mp->mime = "x-system/lnk"; 1575 s = buf; 1576 s += sfsprintf(s, end - s, T("symbolic link to ")); 1577 if (pathgetlink(file, s, end - s) < 0) 1578 return T("cannot read symbolic link text"); 1579 return buf; 1580 } 1581 if (S_ISBLK(st->st_mode)) 1582 { 1583 mp->mime = "x-system/blk"; 1584 sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st)); 1585 return buf; 1586 } 1587 if (S_ISCHR(st->st_mode)) 1588 { 1589 mp->mime = "x-system/chr"; 1590 sfsprintf(buf, end - buf, T("character special (%s)"), fmtdev(st)); 1591 return buf; 1592 } 1593 if (S_ISFIFO(st->st_mode)) 1594 { 1595 mp->mime = "x-system/fifo"; 1596 return "fifo"; 1597 } 1598#ifdef S_ISSOCK 1599 if (S_ISSOCK(st->st_mode)) 1600 { 1601 mp->mime = "x-system/sock"; 1602 return "socket"; 1603 } 1604#endif 1605 } 1606 if (!(mp->fbmx = st->st_size)) 1607 s = T("empty"); 1608 else if (!mp->fp) 1609 s = T("cannot read"); 1610 else 1611 { 1612 mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1); 1613 if (mp->fbsz < 0) 1614 s = fmterror(errno); 1615 else if (mp->fbsz == 0) 1616 s = T("empty"); 1617 else 1618 { 1619 mp->fbuf[mp->fbsz] = 0; 1620 mp->xoff = 0; 1621 mp->xbsz = 0; 1622 if (!(s = ckmagic(mp, file, buf, end, st, 0))) 1623 s = cklang(mp, file, buf, end, st); 1624 } 1625 } 1626 if (!mp->mime) 1627 mp->mime = "application/unknown"; 1628 else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2)) 1629 { 1630 register char* b; 1631 register char* be; 1632 register char* m; 1633 register char* me; 1634 1635 b = mp->mime; 1636 me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1; 1637 while (m < me && b < t) 1638 *m++ = *b++; 1639 b = t = s; 1640 for (;;) 1641 { 1642 if (!(be = strchr(t, ' '))) 1643 { 1644 be = b + strlen(b); 1645 break; 1646 } 1647 if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4)) 1648 break; 1649 b = t; 1650 t = be + 1; 1651 } 1652 while (m < me && b < be) 1653 if ((*m++ = *b++) == ' ') 1654 *(m - 1) = '-'; 1655 *m = 0; 1656 } 1657 return s; 1658} 1659 1660/* 1661 * low level for magicload() 1662 */ 1663 1664static int 1665load(register Magic_t* mp, char* file, register Sfio_t* fp) 1666{ 1667 register Entry_t* ep; 1668 register char* p; 1669 register char* p2; 1670 char* p3; 1671 char* next; 1672 int n; 1673 int lge; 1674 int lev; 1675 int ent; 1676 int old; 1677 int cont; 1678 Info_t* ip; 1679 Entry_t* ret; 1680 Entry_t* first; 1681 Entry_t* last = 0; 1682 Entry_t* fun['z' - 'a' + 1]; 1683 1684 memzero(fun, sizeof(fun)); 1685 cont = '$'; 1686 ent = 0; 1687 lev = 0; 1688 old = 0; 1689 ret = 0; 1690 error_info.file = file; 1691 error_info.line = 0; 1692 first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1693 while (p = sfgetr(fp, '\n', 1)) 1694 { 1695 error_info.line++; 1696 for (; isspace(*p); p++); 1697 1698 /* 1699 * nesting 1700 */ 1701 1702 switch (*p) 1703 { 1704 case 0: 1705 case '#': 1706 cont = '#'; 1707 continue; 1708 case '{': 1709 if (++lev < MAXNEST) 1710 ep->nest = *p; 1711 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1712 (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST); 1713 continue; 1714 case '}': 1715 if (!last || lev <= 0) 1716 { 1717 if (mp->disc->errorf) 1718 (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p); 1719 } 1720 else if (lev-- == ent) 1721 { 1722 ent = 0; 1723 ep->cont = ':'; 1724 ep->offset = ret->offset; 1725 ep->nest = ' '; 1726 ep->type = ' '; 1727 ep->op = ' '; 1728 ep->desc = "[RETURN]"; 1729 last = ep; 1730 ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1731 ret = 0; 1732 } 1733 else 1734 last->nest = *p; 1735 continue; 1736 default: 1737 if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|') 1738 { 1739 n = *p++; 1740 if (n >= 'a' && n <= 'z') 1741 n -= 'a'; 1742 else 1743 { 1744 if (mp->disc->errorf) 1745 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); 1746 n = 0; 1747 } 1748 if (ret && mp->disc->errorf) 1749 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); 1750 if (*p == '{') 1751 { 1752 ent = ++lev; 1753 ret = ep; 1754 ep->desc = "[FUNCTION]"; 1755 } 1756 else 1757 { 1758 if (*(p + 1) != ')' && mp->disc->errorf) 1759 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a'); 1760 ep->desc = "[CALL]"; 1761 } 1762 ep->cont = cont; 1763 ep->offset = n; 1764 ep->nest = ' '; 1765 ep->type = ' '; 1766 ep->op = ' '; 1767 last = ep; 1768 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1769 if (ret) 1770 fun[n] = last->value.lab = ep; 1771 else if (!(last->value.lab = fun[n]) && mp->disc->errorf) 1772 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); 1773 continue; 1774 } 1775 if (!ep->nest) 1776 ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' '; 1777 break; 1778 } 1779 1780 /* 1781 * continuation 1782 */ 1783 1784 cont = '$'; 1785 switch (*p) 1786 { 1787 case '>': 1788 old = 1; 1789 if (*(p + 1) == *p) 1790 { 1791 /* 1792 * old style nesting push 1793 */ 1794 1795 p++; 1796 old = 2; 1797 if (!lev && last) 1798 { 1799 lev = 1; 1800 last->nest = '{'; 1801 if (last->cont == '>') 1802 last->cont = '&'; 1803 ep->nest = '1'; 1804 } 1805 } 1806 /*FALLTHROUGH*/ 1807 case '+': 1808 case '&': 1809 case '|': 1810 ep->cont = *p++; 1811 break; 1812 default: 1813 if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf) 1814 (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p); 1815 /*FALLTHROUGH*/ 1816 case '*': 1817 case '0': case '1': case '2': case '3': case '4': 1818 case '5': case '6': case '7': case '8': case '9': 1819 ep->cont = (lev > 0) ? '&' : '#'; 1820 break; 1821 } 1822 switch (old) 1823 { 1824 case 1: 1825 old = 0; 1826 if (lev) 1827 { 1828 /* 1829 * old style nesting pop 1830 */ 1831 1832 lev = 0; 1833 if (last) 1834 last->nest = '}'; 1835 ep->nest = ' '; 1836 if (ep->cont == '&') 1837 ep->cont = '#'; 1838 } 1839 break; 1840 case 2: 1841 old = 1; 1842 break; 1843 } 1844 if (isdigit(*p)) 1845 { 1846 /* 1847 * absolute offset 1848 */ 1849 1850 ep->offset = strton(p, &next, NiL, 0); 1851 p2 = next; 1852 } 1853 else 1854 { 1855 for (p2 = p; *p2 && !isspace(*p2); p2++); 1856 if (!*p2) 1857 { 1858 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1859 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); 1860 continue; 1861 } 1862 1863 /* 1864 * offset expression 1865 */ 1866 1867 *p2++ = 0; 1868 ep->expr = vmstrdup(mp->vm, p); 1869 if (isalpha(*p)) 1870 ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0; 1871 else if (*p == '(' && ep->cont == '>') 1872 { 1873 /* 1874 * convert old style indirection to @ 1875 */ 1876 1877 p = ep->expr + 1; 1878 for (;;) 1879 { 1880 switch (*p++) 1881 { 1882 case 0: 1883 case '@': 1884 case '(': 1885 break; 1886 case ')': 1887 break; 1888 default: 1889 continue; 1890 } 1891 break; 1892 } 1893 if (*--p == ')') 1894 { 1895 *p = 0; 1896 *ep->expr = '@'; 1897 } 1898 } 1899 } 1900 for (; isspace(*p2); p2++); 1901 for (p = p2; *p2 && !isspace(*p2); p2++); 1902 if (!*p2) 1903 { 1904 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1905 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); 1906 continue; 1907 } 1908 *p2++ = 0; 1909 1910 /* 1911 * type 1912 */ 1913 1914 if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e') 1915 { 1916 ep->swap = ~(*p == 'l' ? 7 : 0); 1917 p += 2; 1918 } 1919 if (*p == 's') 1920 { 1921 if (*(p + 1) == 'h') 1922 ep->type = 'h'; 1923 else 1924 ep->type = 's'; 1925 } 1926 else if (*p == 'a') 1927 ep->type = 's'; 1928 else 1929 ep->type = *p; 1930 if (p = strchr(p, '&')) 1931 { 1932 /* 1933 * old style mask 1934 */ 1935 1936 ep->mask = strton(++p, NiL, NiL, 0); 1937 } 1938 for (; isspace(*p2); p2++); 1939 if (ep->mask) 1940 *--p2 = '='; 1941 1942 /* 1943 * comparison operation 1944 */ 1945 1946 p = p2; 1947 if (p2 = strchr(p, '\t')) 1948 *p2++ = 0; 1949 else 1950 { 1951 int qe = 0; 1952 int qn = 0; 1953 1954 /* 1955 * assume balanced {}[]()\\""'' field 1956 */ 1957 1958 for (p2 = p;;) 1959 { 1960 switch (n = *p2++) 1961 { 1962 case 0: 1963 break; 1964 case '{': 1965 if (!qe) 1966 qe = '}'; 1967 if (qe == '}') 1968 qn++; 1969 continue; 1970 case '(': 1971 if (!qe) 1972 qe = ')'; 1973 if (qe == ')') 1974 qn++; 1975 continue; 1976 case '[': 1977 if (!qe) 1978 qe = ']'; 1979 if (qe == ']') 1980 qn++; 1981 continue; 1982 case '}': 1983 case ')': 1984 case ']': 1985 if (qe == n && qn > 0) 1986 qn--; 1987 continue; 1988 case '"': 1989 case '\'': 1990 if (!qe) 1991 qe = n; 1992 else if (qe == n) 1993 qe = 0; 1994 continue; 1995 case '\\': 1996 if (*p2) 1997 p2++; 1998 continue; 1999 default: 2000 if (!qe && isspace(n)) 2001 break; 2002 continue; 2003 } 2004 if (n) 2005 *(p2 - 1) = 0; 2006 else 2007 p2--; 2008 break; 2009 } 2010 } 2011 lge = 0; 2012 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 2013 ep->op = '='; 2014 else 2015 { 2016 if (*p == '&') 2017 { 2018 ep->mask = strton(++p, &next, NiL, 0); 2019 p = next; 2020 } 2021 switch (*p) 2022 { 2023 case '=': 2024 case '>': 2025 case '<': 2026 case '*': 2027 ep->op = *p++; 2028 if (*p == '=') 2029 { 2030 p++; 2031 switch (ep->op) 2032 { 2033 case '>': 2034 lge = -1; 2035 break; 2036 case '<': 2037 lge = 1; 2038 break; 2039 } 2040 } 2041 break; 2042 case '!': 2043 case '@': 2044 ep->op = *p++; 2045 if (*p == '=') 2046 p++; 2047 break; 2048 case 'x': 2049 p++; 2050 ep->op = '*'; 2051 break; 2052 default: 2053 ep->op = '='; 2054 if (ep->mask) 2055 ep->value.num = ep->mask; 2056 break; 2057 } 2058 } 2059 if (ep->op != '*' && !ep->value.num) 2060 { 2061 if (ep->type == 'e') 2062 { 2063 if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0)) 2064 { 2065 ep->value.sub->re_disc = &mp->redisc; 2066 if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE))) 2067 { 2068 p += ep->value.sub->re_npat; 2069 if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0))) 2070 p += ep->value.sub->re_npat; 2071 } 2072 if (n) 2073 { 2074 regmessage(mp, ep->value.sub, n); 2075 ep->value.sub = 0; 2076 } 2077 else if (*p && mp->disc->errorf) 2078 (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p); 2079 } 2080 } 2081 else if (ep->type == 'm') 2082 { 2083 ep->mask = stresc(p) + 1; 2084 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0); 2085 memcpy(ep->value.str, p, ep->mask); 2086 if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)")) 2087 ep->value.str[ep->mask - 1] = '*'; 2088 } 2089 else if (ep->type == 's') 2090 { 2091 ep->mask = stresc(p); 2092 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0); 2093 memcpy(ep->value.str, p, ep->mask); 2094 } 2095 else if (*p == '\'') 2096 { 2097 stresc(p); 2098 ep->value.num = *(unsigned char*)(p + 1) + lge; 2099 } 2100 else if (strmatch(p, "+([a-z])\\(*\\)")) 2101 { 2102 char* t; 2103 2104 t = p; 2105 ep->type = 'V'; 2106 ep->op = *p; 2107 while (*p && *p++ != '('); 2108 switch (ep->op) 2109 { 2110 case 'l': 2111 n = *p++; 2112 if (n < 'a' || n > 'z') 2113 { 2114 if (mp->disc->errorf) 2115 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); 2116 } 2117 else if (!fun[n -= 'a']) 2118 { 2119 if (mp->disc->errorf) 2120 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); 2121 } 2122 else 2123 { 2124 ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0); 2125 ep->value.loop->lab = fun[n]; 2126 while (*p && *p++ != ','); 2127 ep->value.loop->start = strton(p, &t, NiL, 0); 2128 while (*t && *t++ != ','); 2129 ep->value.loop->size = strton(t, &t, NiL, 0); 2130 } 2131 break; 2132 case 'm': 2133 case 'r': 2134 ep->desc = vmnewof(mp->vm, 0, char, 32, 0); 2135 ep->mime = vmnewof(mp->vm, 0, char, 32, 0); 2136 break; 2137 case 'v': 2138 break; 2139 default: 2140 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 2141 (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t); 2142 break; 2143 } 2144 } 2145 else 2146 { 2147 ep->value.num = strton(p, NiL, NiL, 0) + lge; 2148 if (ep->op == '@') 2149 ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num)); 2150 } 2151 } 2152 2153 /* 2154 * file description 2155 */ 2156 2157 if (p2) 2158 { 2159 for (; isspace(*p2); p2++); 2160 if (p = strchr(p2, '\t')) 2161 { 2162 /* 2163 * check for message catalog index 2164 */ 2165 2166 *p++ = 0; 2167 if (isalpha(*p2)) 2168 { 2169 for (p3 = p2; isalnum(*p3); p3++); 2170 if (*p3++ == ':') 2171 { 2172 for (; isdigit(*p3); p3++); 2173 if (!*p3) 2174 { 2175 for (p2 = p; isspace(*p2); p2++); 2176 if (p = strchr(p2, '\t')) 2177 *p++ = 0; 2178 } 2179 } 2180 } 2181 } 2182 stresc(p2); 2183 ep->desc = vmstrdup(mp->vm, p2); 2184 if (p) 2185 { 2186 for (; isspace(*p); p++); 2187 if (*p) 2188 ep->mime = vmstrdup(mp->vm, p); 2189 } 2190 } 2191 else 2192 ep->desc = ""; 2193 2194 /* 2195 * get next entry 2196 */ 2197 2198 last = ep; 2199 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 2200 } 2201 if (last) 2202 { 2203 last->next = 0; 2204 if (mp->magiclast) 2205 mp->magiclast->next = first; 2206 else 2207 mp->magic = first; 2208 mp->magiclast = last; 2209 } 2210 vmfree(mp->vm, ep); 2211 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 2212 { 2213 if (lev < 0) 2214 (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators"); 2215 else if (lev > 0) 2216 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators"); 2217 if (ret) 2218 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); 2219 } 2220 error_info.file = 0; 2221 error_info.line = 0; 2222 return 0; 2223} 2224 2225/* 2226 * load a magic file into mp 2227 */ 2228 2229int 2230magicload(register Magic_t* mp, const char* file, unsigned long flags) 2231{ 2232 register char* s; 2233 register char* e; 2234 register char* t; 2235 int n; 2236 int found; 2237 int list; 2238 Sfio_t* fp; 2239 2240 mp->flags = mp->disc->flags | flags; 2241 found = 0; 2242 if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1)) 2243 { 2244 if (!(s = getenv(MAGIC_FILE_ENV)) || !*s) 2245 s = MAGIC_FILE; 2246 } 2247 for (;;) 2248 { 2249 if (!list) 2250 e = 0; 2251 else if (e = strchr(s, ':')) 2252 { 2253 /* 2254 * ok, so ~ won't work for the last list element 2255 * we do it for MAGIC_FILES_ENV anyway 2256 */ 2257 2258 if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME"))) 2259 { 2260 sfputr(mp->tmp, t, -1); 2261 s += n - 1; 2262 } 2263 sfwrite(mp->tmp, s, e - s); 2264 if (!(s = sfstruse(mp->tmp))) 2265 goto nospace; 2266 } 2267 if (!*s || streq(s, "-")) 2268 s = MAGIC_FILE; 2269 if (!(fp = sfopen(NiL, s, "r"))) 2270 { 2271 if (list) 2272 { 2273 if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))) && !strchr(s, '/')) 2274 { 2275 strcpy(mp->fbuf, s); 2276 sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf); 2277 if (!(s = sfstruse(mp->tmp))) 2278 goto nospace; 2279 if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf)))) 2280 goto next; 2281 } 2282 if (!(fp = sfopen(NiL, t, "r"))) 2283 goto next; 2284 } 2285 else 2286 { 2287 if (mp->disc->errorf) 2288 (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s); 2289 return -1; 2290 } 2291 } 2292 found = 1; 2293 n = load(mp, s, fp); 2294 sfclose(fp); 2295 if (n && !list) 2296 return -1; 2297 next: 2298 if (!e) 2299 break; 2300 s = e + 1; 2301 } 2302 if (!found) 2303 { 2304 if (mp->flags & MAGIC_VERBOSE) 2305 { 2306 if (mp->disc->errorf) 2307 (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file"); 2308 } 2309 return -1; 2310 } 2311 return 0; 2312 nospace: 2313 if (mp->disc->errorf) 2314 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); 2315 return -1; 2316} 2317 2318/* 2319 * open a magic session 2320 */ 2321 2322Magic_t* 2323magicopen(Magicdisc_t* disc) 2324{ 2325 register Magic_t* mp; 2326 register int i; 2327 register int n; 2328 register int f; 2329 register int c; 2330 register Vmalloc_t* vm; 2331 unsigned char* map[CC_MAPS + 1]; 2332 2333 if (!(vm = vmopen(Vmdcheap, Vmbest, 0))) 2334 return 0; 2335 if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0))) 2336 { 2337 vmclose(vm); 2338 return 0; 2339 } 2340 mp->id = lib; 2341 mp->disc = disc; 2342 mp->vm = vm; 2343 mp->flags = disc->flags; 2344 mp->redisc.re_version = REG_VERSION; 2345 mp->redisc.re_flags = REG_NOFREE; 2346 mp->redisc.re_errorf = (regerror_t)disc->errorf; 2347 mp->redisc.re_resizef = (regresize_t)vmgetmem; 2348 mp->redisc.re_resizehandle = (void*)mp->vm; 2349 mp->dtdisc.key = offsetof(Info_t, name); 2350 mp->dtdisc.link = offsetof(Info_t, link); 2351 if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dtoset))) 2352 goto bad; 2353 for (n = 0; n < elementsof(info); n++) 2354 dtinsert(mp->infotab, &info[n]); 2355 for (i = 0; i < CC_MAPS; i++) 2356 map[i] = ccmap(i, CC_ASCII); 2357 mp->x2n = ccmap(CC_ALIEN, CC_NATIVE); 2358 for (n = 0; n <= UCHAR_MAX; n++) 2359 { 2360 f = 0; 2361 i = CC_MAPS; 2362 while (--i >= 0) 2363 { 2364 c = ccmapchr(map[i], n); 2365 f = (f << CC_BIT) | CCTYPE(c); 2366 } 2367 mp->cctype[n] = f; 2368 } 2369 return mp; 2370 bad: 2371 magicclose(mp); 2372 return 0; 2373} 2374 2375/* 2376 * close a magicopen() session 2377 */ 2378 2379int 2380magicclose(register Magic_t* mp) 2381{ 2382 if (!mp) 2383 return -1; 2384 if (mp->tmp) 2385 sfstrclose(mp->tmp); 2386 if (mp->vm) 2387 vmclose(mp->vm); 2388 return 0; 2389} 2390 2391/* 2392 * return the magic string for file with optional stat info st 2393 */ 2394 2395char* 2396magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st) 2397{ 2398 off_t off; 2399 char* s; 2400 2401 mp->flags = mp->disc->flags; 2402 mp->mime = 0; 2403 if (!st) 2404 s = T("cannot stat"); 2405 else 2406 { 2407 if (mp->fp = fp) 2408 off = sfseek(mp->fp, (off_t)0, SEEK_CUR); 2409 s = type(mp, file, st, mp->tbuf, &mp->tbuf[sizeof(mp->tbuf)-1]); 2410 if (mp->fp) 2411 sfseek(mp->fp, off, SEEK_SET); 2412 if (!(mp->flags & (MAGIC_MIME|MAGIC_ALL))) 2413 { 2414 if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128)) 2415 sfprintf(mp->tmp, "%s ", T("short")); 2416 sfprintf(mp->tmp, "%s", s); 2417 if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) 2418 sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable")); 2419 if (st->st_mode & S_ISUID) 2420 sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid)); 2421 if (st->st_mode & S_ISGID) 2422 sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid)); 2423 if (st->st_mode & S_ISVTX) 2424 sfprintf(mp->tmp, ", sticky"); 2425 if (!(s = sfstruse(mp->tmp))) 2426 s = T("out of space"); 2427 } 2428 } 2429 if (mp->flags & MAGIC_MIME) 2430 s = mp->mime; 2431 if (!s) 2432 s = T("error"); 2433 return s; 2434} 2435 2436/* 2437 * list the magic table in mp on sp 2438 */ 2439 2440int 2441magiclist(register Magic_t* mp, register Sfio_t* sp) 2442{ 2443 register Entry_t* ep = mp->magic; 2444 register Entry_t* rp = 0; 2445 2446 mp->flags = mp->disc->flags; 2447 sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n"); 2448 while (ep) 2449 { 2450 sfprintf(sp, "%c %c\t", ep->cont, ep->nest); 2451 if (ep->expr) 2452 sfprintf(sp, "%s", ep->expr); 2453 else 2454 sfprintf(sp, "%ld", ep->offset); 2455 sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask); 2456 switch (ep->type) 2457 { 2458 case 'm': 2459 case 's': 2460 sfputr(sp, fmtesc(ep->value.str), -1); 2461 break; 2462 case 'V': 2463 switch (ep->op) 2464 { 2465 case 'l': 2466 sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset); 2467 break; 2468 case 'v': 2469 sfprintf(sp, "vcodex()"); 2470 break; 2471 default: 2472 sfprintf(sp, "%p", ep->value.str); 2473 break; 2474 } 2475 break; 2476 default: 2477 sfprintf(sp, "%lo", ep->value.num); 2478 break; 2479 } 2480 sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc)); 2481 if (ep->cont == '$' && !ep->value.lab->mask) 2482 { 2483 rp = ep; 2484 ep = ep->value.lab; 2485 } 2486 else 2487 { 2488 if (ep->cont == ':') 2489 { 2490 ep = rp; 2491 ep->value.lab->mask = 1; 2492 } 2493 ep = ep->next; 2494 } 2495 } 2496 return 0; 2497} 2498