1133359Sobrien/* 2133359Sobrien * Copyright (c) Christos Zoulas 2003. 3133359Sobrien * All Rights Reserved. 4191771Sobrien * 5133359Sobrien * Redistribution and use in source and binary forms, with or without 6133359Sobrien * modification, are permitted provided that the following conditions 7133359Sobrien * are met: 8133359Sobrien * 1. Redistributions of source code must retain the above copyright 9133359Sobrien * notice immediately at the beginning of the file, without modification, 10133359Sobrien * this list of conditions, and the following disclaimer. 11133359Sobrien * 2. Redistributions in binary form must reproduce the above copyright 12133359Sobrien * notice, this list of conditions and the following disclaimer in the 13133359Sobrien * documentation and/or other materials provided with the distribution. 14191771Sobrien * 15133359Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16133359Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17133359Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18133359Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 19133359Sobrien * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20133359Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21133359Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22133359Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23133359Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24133359Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25133359Sobrien * SUCH DAMAGE. 26133359Sobrien */ 27133359Sobrien#include "file.h" 28191771Sobrien 29191771Sobrien#ifndef lint 30234449SobrienFILE_RCSID("@(#)$File: funcs.c,v 1.60 2011/12/08 12:38:24 rrt Exp $") 31191771Sobrien#endif /* lint */ 32191771Sobrien 33133359Sobrien#include "magic.h" 34133359Sobrien#include <stdarg.h> 35133359Sobrien#include <stdlib.h> 36133359Sobrien#include <string.h> 37133359Sobrien#include <ctype.h> 38159764Sobrien#if defined(HAVE_WCHAR_H) 39159764Sobrien#include <wchar.h> 40159764Sobrien#endif 41169942Sobrien#if defined(HAVE_WCTYPE_H) 42169942Sobrien#include <wctype.h> 43169942Sobrien#endif 44169962Sobrien#if defined(HAVE_LIMITS_H) 45169962Sobrien#include <limits.h> 46169962Sobrien#endif 47133359Sobrien 48186691Sobrien#ifndef SIZE_MAX 49186691Sobrien#define SIZE_MAX ((size_t)~0) 50159764Sobrien#endif 51159764Sobrien 52133359Sobrien/* 53186691Sobrien * Like printf, only we append to a buffer. 54133359Sobrien */ 55133359Sobrienprotected int 56186691Sobrienfile_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 57133359Sobrien{ 58186691Sobrien int len; 59186691Sobrien char *buf, *newstr; 60133359Sobrien 61186691Sobrien len = vasprintf(&buf, fmt, ap); 62186691Sobrien if (len < 0) 63175296Sobrien goto out; 64169962Sobrien 65186691Sobrien if (ms->o.buf != NULL) { 66186691Sobrien len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 67186691Sobrien free(buf); 68186691Sobrien if (len < 0) 69175296Sobrien goto out; 70186691Sobrien free(ms->o.buf); 71186691Sobrien buf = newstr; 72133359Sobrien } 73186691Sobrien ms->o.buf = buf; 74133359Sobrien return 0; 75175296Sobrienout: 76186691Sobrien file_error(ms, errno, "vasprintf failed"); 77175296Sobrien return -1; 78133359Sobrien} 79133359Sobrien 80186691Sobrienprotected int 81186691Sobrienfile_printf(struct magic_set *ms, const char *fmt, ...) 82186691Sobrien{ 83186691Sobrien int rv; 84186691Sobrien va_list ap; 85186691Sobrien 86186691Sobrien va_start(ap, fmt); 87186691Sobrien rv = file_vprintf(ms, fmt, ap); 88186691Sobrien va_end(ap); 89186691Sobrien return rv; 90186691Sobrien} 91186691Sobrien 92133359Sobrien/* 93133359Sobrien * error - print best error message possible 94133359Sobrien */ 95133359Sobrien/*VARARGS*/ 96169962Sobrienprivate void 97169962Sobrienfile_error_core(struct magic_set *ms, int error, const char *f, va_list va, 98234449Sobrien size_t lineno) 99133359Sobrien{ 100133359Sobrien /* Only the first error is ok */ 101191771Sobrien if (ms->event_flags & EVENT_HAD_ERR) 102133359Sobrien return; 103169962Sobrien if (lineno != 0) { 104186691Sobrien free(ms->o.buf); 105186691Sobrien ms->o.buf = NULL; 106234449Sobrien file_printf(ms, "line %" SIZE_T_FORMAT "u: ", lineno); 107169962Sobrien } 108191771Sobrien file_vprintf(ms, f, va); 109186691Sobrien if (error > 0) 110186691Sobrien file_printf(ms, " (%s)", strerror(error)); 111191771Sobrien ms->event_flags |= EVENT_HAD_ERR; 112133359Sobrien ms->error = error; 113133359Sobrien} 114133359Sobrien 115169962Sobrien/*VARARGS*/ 116169962Sobrienprotected void 117169962Sobrienfile_error(struct magic_set *ms, int error, const char *f, ...) 118169962Sobrien{ 119169962Sobrien va_list va; 120169962Sobrien va_start(va, f); 121169962Sobrien file_error_core(ms, error, f, va, 0); 122169962Sobrien va_end(va); 123169962Sobrien} 124133359Sobrien 125169962Sobrien/* 126169962Sobrien * Print an error with magic line number. 127169962Sobrien */ 128169962Sobrien/*VARARGS*/ 129133359Sobrienprotected void 130169962Sobrienfile_magerror(struct magic_set *ms, const char *f, ...) 131169962Sobrien{ 132169962Sobrien va_list va; 133169962Sobrien va_start(va, f); 134169962Sobrien file_error_core(ms, 0, f, va, ms->line); 135169962Sobrien va_end(va); 136169962Sobrien} 137169962Sobrien 138169962Sobrienprotected void 139169942Sobrienfile_oomem(struct magic_set *ms, size_t len) 140133359Sobrien{ 141234449Sobrien file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes", 142234449Sobrien len); 143133359Sobrien} 144133359Sobrien 145133359Sobrienprotected void 146133359Sobrienfile_badseek(struct magic_set *ms) 147133359Sobrien{ 148133359Sobrien file_error(ms, errno, "error seeking"); 149133359Sobrien} 150133359Sobrien 151133359Sobrienprotected void 152133359Sobrienfile_badread(struct magic_set *ms) 153133359Sobrien{ 154133359Sobrien file_error(ms, errno, "error reading"); 155133359Sobrien} 156133359Sobrien 157133359Sobrien#ifndef COMPILE_ONLY 158133359Sobrienprotected int 159234449Sobrienfile_buffer(struct magic_set *ms, int fd, const char *inname __attribute__ ((unused)), 160234449Sobrien const void *buf, size_t nb) 161133359Sobrien{ 162191771Sobrien int m = 0, rv = 0, looks_text = 0; 163175296Sobrien int mime = ms->flags & MAGIC_MIME; 164186691Sobrien const unsigned char *ubuf = CAST(const unsigned char *, buf); 165191771Sobrien unichar *u8buf = NULL; 166191771Sobrien size_t ulen; 167191771Sobrien const char *code = NULL; 168191771Sobrien const char *code_mime = "binary"; 169191771Sobrien const char *type = NULL; 170169962Sobrien 171191771Sobrien 172191771Sobrien 173175296Sobrien if (nb == 0) { 174175296Sobrien if ((!mime || (mime & MAGIC_MIME_TYPE)) && 175175296Sobrien file_printf(ms, mime ? "application/x-empty" : 176175296Sobrien "empty") == -1) 177175296Sobrien return -1; 178175296Sobrien return 1; 179175296Sobrien } else if (nb == 1) { 180175296Sobrien if ((!mime || (mime & MAGIC_MIME_TYPE)) && 181186691Sobrien file_printf(ms, mime ? "application/octet-stream" : 182175296Sobrien "very short file (no magic)") == -1) 183175296Sobrien return -1; 184175296Sobrien return 1; 185175296Sobrien } 186175296Sobrien 187191771Sobrien if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 188191771Sobrien looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 189191771Sobrien &code, &code_mime, &type); 190191771Sobrien } 191191771Sobrien 192169962Sobrien#ifdef __EMX__ 193175296Sobrien if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 194175296Sobrien switch (file_os2_apptype(ms, inname, buf, nb)) { 195175296Sobrien case -1: 196175296Sobrien return -1; 197175296Sobrien case 0: 198175296Sobrien break; 199175296Sobrien default: 200175296Sobrien return 1; 201175296Sobrien } 202169962Sobrien } 203169962Sobrien#endif 204234449Sobrien#if HAVE_FORK 205175296Sobrien /* try compression stuff */ 206191771Sobrien if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) 207191771Sobrien if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) { 208191771Sobrien if ((ms->flags & MAGIC_DEBUG) != 0) 209191771Sobrien (void)fprintf(stderr, "zmagic %d\n", m); 210191771Sobrien goto done; 211133359Sobrien } 212234449Sobrien#endif 213191771Sobrien /* Check if we have a tar file */ 214191771Sobrien if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) 215191771Sobrien if ((m = file_is_tar(ms, ubuf, nb)) != 0) { 216191771Sobrien if ((ms->flags & MAGIC_DEBUG) != 0) 217191771Sobrien (void)fprintf(stderr, "tar %d\n", m); 218191771Sobrien goto done; 219191771Sobrien } 220191771Sobrien 221191771Sobrien /* Check if we have a CDF file */ 222191771Sobrien if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) 223191771Sobrien if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) { 224191771Sobrien if ((ms->flags & MAGIC_DEBUG) != 0) 225191771Sobrien (void)fprintf(stderr, "cdf %d\n", m); 226191771Sobrien goto done; 227191771Sobrien } 228191771Sobrien 229191771Sobrien /* try soft magic tests */ 230191771Sobrien if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) 231267829Sdelphij if ((m = file_softmagic(ms, ubuf, nb, 0, BINTEST, 232234449Sobrien looks_text)) != 0) { 233191771Sobrien if ((ms->flags & MAGIC_DEBUG) != 0) 234191771Sobrien (void)fprintf(stderr, "softmagic %d\n", m); 235169962Sobrien#ifdef BUILTIN_ELF 236191771Sobrien if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 237191771Sobrien nb > 5 && fd != -1) { 238191771Sobrien /* 239191771Sobrien * We matched something in the file, so this 240191771Sobrien * *might* be an ELF file, and the file is at 241191771Sobrien * least 5 bytes long, so if it's an ELF file 242191771Sobrien * it has at least one byte past the ELF magic 243191771Sobrien * number - try extracting information from the 244191771Sobrien * ELF headers that cannot easily * be 245191771Sobrien * extracted with rules in the magic file. 246191771Sobrien */ 247191771Sobrien if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0) 248191771Sobrien if ((ms->flags & MAGIC_DEBUG) != 0) 249191771Sobrien (void)fprintf(stderr, 250191771Sobrien "elf %d\n", m); 251191771Sobrien } 252191771Sobrien#endif 253191771Sobrien goto done; 254191771Sobrien } 255191771Sobrien 256234449Sobrien /* try text properties */ 257191771Sobrien if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 258191771Sobrien 259234449Sobrien if ((m = file_ascmagic(ms, ubuf, nb, looks_text)) != 0) { 260191771Sobrien if ((ms->flags & MAGIC_DEBUG) != 0) 261191771Sobrien (void)fprintf(stderr, "ascmagic %d\n", m); 262191771Sobrien goto done; 263191771Sobrien } 264191771Sobrien 265191771Sobrien /* try to discover text encoding */ 266191771Sobrien if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 267191771Sobrien if (looks_text == 0) 268191771Sobrien if ((m = file_ascmagic_with_encoding( ms, ubuf, 269234449Sobrien nb, u8buf, ulen, code, type, looks_text)) 270234449Sobrien != 0) { 271191771Sobrien if ((ms->flags & MAGIC_DEBUG) != 0) 272191771Sobrien (void)fprintf(stderr, 273191771Sobrien "ascmagic/enc %d\n", m); 274191771Sobrien goto done; 275191771Sobrien } 276191771Sobrien } 277175296Sobrien } 278191771Sobrien 279191771Sobrien /* give up */ 280191771Sobrien m = 1; 281191771Sobrien if ((!mime || (mime & MAGIC_MIME_TYPE)) && 282191771Sobrien file_printf(ms, mime ? "application/octet-stream" : "data") == -1) { 283191771Sobrien rv = -1; 284191771Sobrien } 285191771Sobrien done: 286191771Sobrien if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 287191771Sobrien if (ms->flags & MAGIC_MIME_TYPE) 288191771Sobrien if (file_printf(ms, "; charset=") == -1) 289191771Sobrien rv = -1; 290191771Sobrien if (file_printf(ms, "%s", code_mime) == -1) 291191771Sobrien rv = -1; 292191771Sobrien } 293234449Sobrien free(u8buf); 294191771Sobrien if (rv) 295191771Sobrien return rv; 296191771Sobrien 297175296Sobrien return m; 298133359Sobrien} 299133359Sobrien#endif 300133359Sobrien 301133359Sobrienprotected int 302133359Sobrienfile_reset(struct magic_set *ms) 303133359Sobrien{ 304133359Sobrien if (ms->mlist == NULL) { 305133359Sobrien file_error(ms, 0, "no magic files loaded"); 306133359Sobrien return -1; 307133359Sobrien } 308192350Sdelphij if (ms->o.buf) { 309192350Sdelphij free(ms->o.buf); 310192350Sdelphij ms->o.buf = NULL; 311192350Sdelphij } 312192350Sdelphij if (ms->o.pbuf) { 313192350Sdelphij free(ms->o.pbuf); 314192350Sdelphij ms->o.pbuf = NULL; 315192350Sdelphij } 316191771Sobrien ms->event_flags &= ~EVENT_HAD_ERR; 317133359Sobrien ms->error = -1; 318133359Sobrien return 0; 319133359Sobrien} 320133359Sobrien 321159764Sobrien#define OCTALIFY(n, o) \ 322169942Sobrien /*LINTED*/ \ 323169942Sobrien (void)(*(n)++ = '\\', \ 324159764Sobrien *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 325159764Sobrien *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 326159764Sobrien *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 327169942Sobrien (o)++) 328159764Sobrien 329133359Sobrienprotected const char * 330133359Sobrienfile_getbuffer(struct magic_set *ms) 331133359Sobrien{ 332169962Sobrien char *pbuf, *op, *np; 333169962Sobrien size_t psize, len; 334133359Sobrien 335191771Sobrien if (ms->event_flags & EVENT_HAD_ERR) 336133359Sobrien return NULL; 337133359Sobrien 338133359Sobrien if (ms->flags & MAGIC_RAW) 339133359Sobrien return ms->o.buf; 340133359Sobrien 341191771Sobrien if (ms->o.buf == NULL) 342191771Sobrien return NULL; 343191771Sobrien 344169962Sobrien /* * 4 is for octal representation, + 1 is for NUL */ 345186691Sobrien len = strlen(ms->o.buf); 346186691Sobrien if (len > (SIZE_MAX - 1) / 4) { 347169962Sobrien file_oomem(ms, len); 348169962Sobrien return NULL; 349169962Sobrien } 350169962Sobrien psize = len * 4 + 1; 351186691Sobrien if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 352186691Sobrien file_oomem(ms, psize); 353186691Sobrien return NULL; 354133359Sobrien } 355186691Sobrien ms->o.pbuf = pbuf; 356133359Sobrien 357159764Sobrien#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 358159764Sobrien { 359159764Sobrien mbstate_t state; 360159764Sobrien wchar_t nextchar; 361159764Sobrien int mb_conv = 1; 362159764Sobrien size_t bytesconsumed; 363159764Sobrien char *eop; 364159764Sobrien (void)memset(&state, 0, sizeof(mbstate_t)); 365159764Sobrien 366159764Sobrien np = ms->o.pbuf; 367159764Sobrien op = ms->o.buf; 368186691Sobrien eop = op + len; 369159764Sobrien 370159764Sobrien while (op < eop) { 371169942Sobrien bytesconsumed = mbrtowc(&nextchar, op, 372169942Sobrien (size_t)(eop - op), &state); 373159764Sobrien if (bytesconsumed == (size_t)(-1) || 374159764Sobrien bytesconsumed == (size_t)(-2)) { 375159764Sobrien mb_conv = 0; 376159764Sobrien break; 377159764Sobrien } 378159764Sobrien 379169942Sobrien if (iswprint(nextchar)) { 380159764Sobrien (void)memcpy(np, op, bytesconsumed); 381159764Sobrien op += bytesconsumed; 382159764Sobrien np += bytesconsumed; 383159764Sobrien } else { 384159764Sobrien while (bytesconsumed-- > 0) 385159764Sobrien OCTALIFY(np, op); 386159764Sobrien } 387159764Sobrien } 388159764Sobrien *np = '\0'; 389159764Sobrien 390159764Sobrien /* Parsing succeeded as a multi-byte sequence */ 391159764Sobrien if (mb_conv != 0) 392159764Sobrien return ms->o.pbuf; 393159764Sobrien } 394159764Sobrien#endif 395159764Sobrien 396234449Sobrien for (np = ms->o.pbuf, op = ms->o.buf; *op;) { 397133359Sobrien if (isprint((unsigned char)*op)) { 398234449Sobrien *np++ = *op++; 399133359Sobrien } else { 400159764Sobrien OCTALIFY(np, op); 401133359Sobrien } 402133359Sobrien } 403133359Sobrien *np = '\0'; 404133359Sobrien return ms->o.pbuf; 405133359Sobrien} 406159764Sobrien 407169962Sobrienprotected int 408169962Sobrienfile_check_mem(struct magic_set *ms, unsigned int level) 409169962Sobrien{ 410169962Sobrien size_t len; 411169962Sobrien 412169962Sobrien if (level >= ms->c.len) { 413169962Sobrien len = (ms->c.len += 20) * sizeof(*ms->c.li); 414186691Sobrien ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 415186691Sobrien malloc(len) : 416186691Sobrien realloc(ms->c.li, len)); 417169962Sobrien if (ms->c.li == NULL) { 418169962Sobrien file_oomem(ms, len); 419169962Sobrien return -1; 420169962Sobrien } 421169962Sobrien } 422169962Sobrien ms->c.li[level].got_match = 0; 423169962Sobrien#ifdef ENABLE_CONDITIONALS 424169962Sobrien ms->c.li[level].last_match = 0; 425169962Sobrien ms->c.li[level].last_cond = COND_NONE; 426169962Sobrien#endif /* ENABLE_CONDITIONALS */ 427169962Sobrien return 0; 428169962Sobrien} 429234449Sobrien 430234449Sobrienprotected size_t 431234449Sobrienfile_printedlen(const struct magic_set *ms) 432234449Sobrien{ 433234449Sobrien return ms->o.buf == NULL ? 0 : strlen(ms->o.buf); 434234449Sobrien} 435234449Sobrien 436234449Sobrienprotected int 437234449Sobrienfile_replace(struct magic_set *ms, const char *pat, const char *rep) 438234449Sobrien{ 439234449Sobrien regex_t rx; 440234449Sobrien int rc; 441234449Sobrien 442234449Sobrien rc = regcomp(&rx, pat, REG_EXTENDED); 443234449Sobrien if (rc) { 444234449Sobrien char errmsg[512]; 445234449Sobrien (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); 446234449Sobrien file_magerror(ms, "regex error %d, (%s)", rc, errmsg); 447234449Sobrien return -1; 448234449Sobrien } else { 449234449Sobrien regmatch_t rm; 450234449Sobrien int nm = 0; 451234449Sobrien while (regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { 452234449Sobrien ms->o.buf[rm.rm_so] = '\0'; 453234449Sobrien if (file_printf(ms, "%s%s", rep, 454234449Sobrien rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) 455234449Sobrien return -1; 456234449Sobrien nm++; 457234449Sobrien } 458234449Sobrien regfree(&rx); 459234449Sobrien return nm; 460234449Sobrien } 461234449Sobrien} 462