1/* 2 * Copyright (c) Christos Zoulas 2003. 3 * All Rights Reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27#include "file.h" 28 29#ifndef lint 30FILE_RCSID("@(#)$File: funcs.c,v 1.54 2009/05/08 17:41:59 christos Exp $") 31#endif /* lint */ 32 33#include "magic.h" 34#include <stdarg.h> 35#include <stdlib.h> 36#include <string.h> 37#include <ctype.h> 38#if defined(HAVE_WCHAR_H) 39#include <wchar.h> 40#endif 41#if defined(HAVE_WCTYPE_H) 42#include <wctype.h> 43#endif 44#if defined(HAVE_LIMITS_H) 45#include <limits.h> 46#endif 47 48#ifndef SIZE_MAX 49#define SIZE_MAX ((size_t)~0) 50#endif 51 52/* 53 * Like printf, only we append to a buffer. 54 */ 55protected int 56file_vprintf(struct magic_set *ms, const char *fmt, va_list ap) 57{ 58 int len; 59 char *buf, *newstr; 60 61 len = vasprintf(&buf, fmt, ap); 62 if (len < 0) 63 goto out; 64 65 if (ms->o.buf != NULL) { 66 len = asprintf(&newstr, "%s%s", ms->o.buf, buf); 67 free(buf); 68 if (len < 0) 69 goto out; 70 free(ms->o.buf); 71 buf = newstr; 72 } 73 ms->o.buf = buf; 74 return 0; 75out: 76 file_error(ms, errno, "vasprintf failed"); 77 return -1; 78} 79 80protected int 81file_printf(struct magic_set *ms, const char *fmt, ...) 82{ 83 int rv; 84 va_list ap; 85 86 va_start(ap, fmt); 87 rv = file_vprintf(ms, fmt, ap); 88 va_end(ap); 89 return rv; 90} 91 92/* 93 * error - print best error message possible 94 */ 95/*VARARGS*/ 96private void 97file_error_core(struct magic_set *ms, int error, const char *f, va_list va, 98 size_t lineno) 99{ 100 /* Only the first error is ok */ 101 if (ms->event_flags & EVENT_HAD_ERR) 102 return; 103 if (lineno != 0) { 104 free(ms->o.buf); 105 ms->o.buf = NULL; 106 file_printf(ms, "line %zu: ", lineno); 107 } 108 file_vprintf(ms, f, va); 109 if (error > 0) 110 file_printf(ms, " (%s)", strerror(error)); 111 ms->event_flags |= EVENT_HAD_ERR; 112 ms->error = error; 113} 114 115/*VARARGS*/ 116protected void 117file_error(struct magic_set *ms, int error, const char *f, ...) 118{ 119 va_list va; 120 va_start(va, f); 121 file_error_core(ms, error, f, va, 0); 122 va_end(va); 123} 124 125/* 126 * Print an error with magic line number. 127 */ 128/*VARARGS*/ 129protected void 130file_magerror(struct magic_set *ms, const char *f, ...) 131{ 132 va_list va; 133 va_start(va, f); 134 file_error_core(ms, 0, f, va, ms->line); 135 va_end(va); 136} 137 138protected void 139file_oomem(struct magic_set *ms, size_t len) 140{ 141 file_error(ms, errno, "cannot allocate %zu bytes", len); 142} 143 144protected void 145file_badseek(struct magic_set *ms) 146{ 147 file_error(ms, errno, "error seeking"); 148} 149 150protected void 151file_badread(struct magic_set *ms) 152{ 153 file_error(ms, errno, "error reading"); 154} 155 156#ifndef COMPILE_ONLY 157protected int 158file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf, 159 size_t nb) 160{ 161 int m = 0, rv = 0, looks_text = 0; 162 int mime = ms->flags & MAGIC_MIME; 163 const unsigned char *ubuf = CAST(const unsigned char *, buf); 164 unichar *u8buf = NULL; 165 size_t ulen; 166 const char *code = NULL; 167 const char *code_mime = "binary"; 168 const char *type = NULL; 169 170 171 172 if (nb == 0) { 173 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 174 file_printf(ms, mime ? "application/x-empty" : 175 "empty") == -1) 176 return -1; 177 return 1; 178 } else if (nb == 1) { 179 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 180 file_printf(ms, mime ? "application/octet-stream" : 181 "very short file (no magic)") == -1) 182 return -1; 183 return 1; 184 } 185 186 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 187 looks_text = file_encoding(ms, ubuf, nb, &u8buf, &ulen, 188 &code, &code_mime, &type); 189 } 190 191#ifdef __EMX__ 192 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { 193 switch (file_os2_apptype(ms, inname, buf, nb)) { 194 case -1: 195 return -1; 196 case 0: 197 break; 198 default: 199 return 1; 200 } 201 } 202#endif 203 204 /* try compression stuff */ 205 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) 206 if ((m = file_zmagic(ms, fd, inname, ubuf, nb)) != 0) { 207 if ((ms->flags & MAGIC_DEBUG) != 0) 208 (void)fprintf(stderr, "zmagic %d\n", m); 209 goto done; 210 } 211 212 /* Check if we have a tar file */ 213 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) 214 if ((m = file_is_tar(ms, ubuf, nb)) != 0) { 215 if ((ms->flags & MAGIC_DEBUG) != 0) 216 (void)fprintf(stderr, "tar %d\n", m); 217 goto done; 218 } 219 220 /* Check if we have a CDF file */ 221 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) 222 if ((m = file_trycdf(ms, fd, ubuf, nb)) != 0) { 223 if ((ms->flags & MAGIC_DEBUG) != 0) 224 (void)fprintf(stderr, "cdf %d\n", m); 225 goto done; 226 } 227 228 /* try soft magic tests */ 229 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) 230 if ((m = file_softmagic(ms, ubuf, nb, BINTEST)) != 0) { 231 if ((ms->flags & MAGIC_DEBUG) != 0) 232 (void)fprintf(stderr, "softmagic %d\n", m); 233#ifdef BUILTIN_ELF 234 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && 235 nb > 5 && fd != -1) { 236 /* 237 * We matched something in the file, so this 238 * *might* be an ELF file, and the file is at 239 * least 5 bytes long, so if it's an ELF file 240 * it has at least one byte past the ELF magic 241 * number - try extracting information from the 242 * ELF headers that cannot easily * be 243 * extracted with rules in the magic file. 244 */ 245 if ((m = file_tryelf(ms, fd, ubuf, nb)) != 0) 246 if ((ms->flags & MAGIC_DEBUG) != 0) 247 (void)fprintf(stderr, 248 "elf %d\n", m); 249 } 250#endif 251#ifdef BUILTIN_MACHO 252 file_trymacho(ms, fd, ubuf, nb, inname); 253#endif 254 goto done; 255 } 256 257 /* try text properties (and possibly text tokens) */ 258 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) { 259 260 if ((m = file_ascmagic(ms, ubuf, nb)) != 0) { 261 if ((ms->flags & MAGIC_DEBUG) != 0) 262 (void)fprintf(stderr, "ascmagic %d\n", m); 263 goto done; 264 } 265 266 /* try to discover text encoding */ 267 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) { 268 if (looks_text == 0) 269 if ((m = file_ascmagic_with_encoding( ms, ubuf, 270 nb, u8buf, ulen, code, type)) != 0) { 271 if ((ms->flags & MAGIC_DEBUG) != 0) 272 (void)fprintf(stderr, 273 "ascmagic/enc %d\n", m); 274 goto done; 275 } 276 } 277 } 278 279 /* give up */ 280 m = 1; 281 if ((!mime || (mime & MAGIC_MIME_TYPE)) && 282 file_printf(ms, mime ? "application/octet-stream" : "data") == -1) { 283 rv = -1; 284 } 285 done: 286 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) { 287 if (ms->flags & MAGIC_MIME_TYPE) 288 if (file_printf(ms, "; charset=") == -1) 289 rv = -1; 290 if (file_printf(ms, "%s", code_mime) == -1) 291 rv = -1; 292 } 293 if (u8buf) 294 free(u8buf); 295 if (rv) 296 return rv; 297 298 return m; 299} 300#endif 301 302protected int 303file_reset(struct magic_set *ms) 304{ 305 if (ms->mlist == NULL) { 306 file_error(ms, 0, "no magic files loaded"); 307 return -1; 308 } 309 if (ms->o.buf) { 310 free(ms->o.buf); 311 ms->o.buf = NULL; 312 } 313 if (ms->o.pbuf) { 314 free(ms->o.pbuf); 315 ms->o.pbuf = NULL; 316 } 317 ms->event_flags &= ~EVENT_HAD_ERR; 318 ms->error = -1; 319 return 0; 320} 321 322#define OCTALIFY(n, o) \ 323 /*LINTED*/ \ 324 (void)(*(n)++ = '\\', \ 325 *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ 326 *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ 327 *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ 328 (o)++) 329 330protected const char * 331file_getbuffer(struct magic_set *ms) 332{ 333 char *pbuf, *op, *np; 334 size_t psize, len; 335 336 if (ms->event_flags & EVENT_HAD_ERR) 337 return NULL; 338 339 if (ms->flags & MAGIC_RAW) 340 return ms->o.buf; 341 342 if (ms->o.buf == NULL) 343 return NULL; 344 345 /* * 4 is for octal representation, + 1 is for NUL */ 346 len = strlen(ms->o.buf); 347 if (len > (SIZE_MAX - 1) / 4) { 348 file_oomem(ms, len); 349 return NULL; 350 } 351 psize = len * 4 + 1; 352 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) { 353 file_oomem(ms, psize); 354 return NULL; 355 } 356 ms->o.pbuf = pbuf; 357 358#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) 359 { 360 mbstate_t state; 361 wchar_t nextchar; 362 int mb_conv = 1; 363 size_t bytesconsumed; 364 char *eop; 365 (void)memset(&state, 0, sizeof(mbstate_t)); 366 367 np = ms->o.pbuf; 368 op = ms->o.buf; 369 eop = op + len; 370 371 while (op < eop) { 372 bytesconsumed = mbrtowc(&nextchar, op, 373 (size_t)(eop - op), &state); 374 if (bytesconsumed == (size_t)(-1) || 375 bytesconsumed == (size_t)(-2)) { 376 mb_conv = 0; 377 break; 378 } 379 380 if (iswprint(nextchar) || nextchar == L'\t' || nextchar == L'\n') { 381 (void)memcpy(np, op, bytesconsumed); 382 op += bytesconsumed; 383 np += bytesconsumed; 384 } else { 385 while (bytesconsumed-- > 0) 386 OCTALIFY(np, op); 387 } 388 } 389 *np = '\0'; 390 391 /* Parsing succeeded as a multi-byte sequence */ 392 if (mb_conv != 0) 393 return ms->o.pbuf; 394 } 395#endif 396 397 for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) { 398 if (isprint((unsigned char)*op) || ((unsigned char)*op) == '\t' || ((unsigned char)*op) == '\n') { 399 *np++ = *op; 400 } else { 401 OCTALIFY(np, op); 402 } 403 } 404 *np = '\0'; 405 return ms->o.pbuf; 406} 407 408protected int 409file_check_mem(struct magic_set *ms, unsigned int level) 410{ 411 size_t len; 412 413 if (level >= ms->c.len) { 414 len = (ms->c.len += 20) * sizeof(*ms->c.li); 415 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ? 416 malloc(len) : 417 realloc(ms->c.li, len)); 418 if (ms->c.li == NULL) { 419 file_oomem(ms, len); 420 return -1; 421 } 422 } 423 ms->c.li[level].got_match = 0; 424#ifdef ENABLE_CONDITIONALS 425 ms->c.li[level].last_match = 0; 426 ms->c.li[level].last_cond = COND_NONE; 427#endif /* ENABLE_CONDITIONALS */ 428 return 0; 429} 430