pcap-linux.c revision 98530
1/* 2 * pcap-linux.c: Packet capture interface to the Linux kernel 3 * 4 * Copyright (c) 2000 Torsten Landschoff <torsten@debian.org> 5 * Sebastian Krahmer <krahmer@cs.uni-potsdam.de> 6 * 7 * License: BSD 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. The names of the authors may not be used to endorse or promote 20 * products derived from this software without specific prior 21 * written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 26 */ 27#ifndef lint 28static const char rcsid[] = 29 "@(#) $Header: /tcpdump/master/libpcap/pcap-linux.c,v 1.73 2001/12/10 07:14:16 guy Exp $ (LBL)"; 30#endif 31 32/* 33 * Known problems with 2.0[.x] kernels: 34 * 35 * - The loopback device gives every packet twice; on 2.2[.x] kernels, 36 * if we use PF_PACKET, we can filter out the transmitted version 37 * of the packet by using data in the "sockaddr_ll" returned by 38 * "recvfrom()", but, on 2.0[.x] kernels, we have to use 39 * PF_INET/SOCK_PACKET, which means "recvfrom()" supplies a 40 * "sockaddr_pkt" which doesn't give us enough information to let 41 * us do that. 42 * 43 * - We have to set the interface's IFF_PROMISC flag ourselves, if 44 * we're to run in promiscuous mode, which means we have to turn 45 * it off ourselves when we're done; the kernel doesn't keep track 46 * of how many sockets are listening promiscuously, which means 47 * it won't get turned off automatically when no sockets are 48 * listening promiscuously. We catch "pcap_close()" and, for 49 * interfaces we put into promiscuous mode, take them out of 50 * promiscuous mode - which isn't necessarily the right thing to 51 * do, if another socket also requested promiscuous mode between 52 * the time when we opened the socket and the time when we close 53 * the socket. 54 * 55 * - MSG_TRUNC isn't supported, so you can't specify that "recvfrom()" 56 * return the amount of data that you could have read, rather than 57 * the amount that was returned, so we can't just allocate a buffer 58 * whose size is the snapshot length and pass the snapshot length 59 * as the byte count, and also pass MSG_TRUNC, so that the return 60 * value tells us how long the packet was on the wire. 61 * 62 * This means that, if we want to get the actual size of the packet, 63 * so we can return it in the "len" field of the packet header, 64 * we have to read the entire packet, not just the part that fits 65 * within the snapshot length, and thus waste CPU time copying data 66 * from the kernel that our caller won't see. 67 * 68 * We have to get the actual size, and supply it in "len", because 69 * otherwise, the IP dissector in tcpdump, for example, will complain 70 * about "truncated-ip", as the packet will appear to have been 71 * shorter, on the wire, than the IP header said it should have been. 72 */ 73 74 75#ifdef HAVE_CONFIG_H 76#include "config.h" 77#endif 78 79#include "pcap-int.h" 80#include "sll.h" 81 82#include <errno.h> 83#include <stdlib.h> 84#include <unistd.h> 85#include <fcntl.h> 86#include <string.h> 87#include <sys/socket.h> 88#include <sys/ioctl.h> 89#include <sys/utsname.h> 90#include <net/if.h> 91#include <netinet/in.h> 92#include <linux/if_ether.h> 93#include <net/if_arp.h> 94 95/* 96 * If PF_PACKET is defined, we can use {SOCK_RAW,SOCK_DGRAM}/PF_PACKET 97 * sockets rather than SOCK_PACKET sockets. 98 * 99 * To use them, we include <linux/if_packet.h> rather than 100 * <netpacket/packet.h>; we do so because 101 * 102 * some Linux distributions (e.g., Slackware 4.0) have 2.2 or 103 * later kernels and libc5, and don't provide a <netpacket/packet.h> 104 * file; 105 * 106 * not all versions of glibc2 have a <netpacket/packet.h> file 107 * that defines stuff needed for some of the 2.4-or-later-kernel 108 * features, so if the system has a 2.4 or later kernel, we 109 * still can't use those features. 110 * 111 * We're already including a number of other <linux/XXX.h> headers, and 112 * this code is Linux-specific (no other OS has PF_PACKET sockets as 113 * a raw packet capture mechanism), so it's not as if you gain any 114 * useful portability by using <netpacket/packet.h> 115 * 116 * XXX - should we just include <linux/if_packet.h> even if PF_PACKET 117 * isn't defined? It only defines one data structure in 2.0.x, so 118 * it shouldn't cause any problems. 119 */ 120#ifdef PF_PACKET 121# include <linux/if_packet.h> 122 123 /* 124 * On at least some Linux distributions (for example, Red Hat 5.2), 125 * there's no <netpacket/packet.h> file, but PF_PACKET is defined if 126 * you include <sys/socket.h>, but <linux/if_packet.h> doesn't define 127 * any of the PF_PACKET stuff such as "struct sockaddr_ll" or any of 128 * the PACKET_xxx stuff. 129 * 130 * So we check whether PACKET_HOST is defined, and assume that we have 131 * PF_PACKET sockets only if it is defined. 132 */ 133# ifdef PACKET_HOST 134# define HAVE_PF_PACKET_SOCKETS 135# endif /* PACKET_HOST */ 136#endif /* PF_PACKET */ 137 138#ifdef SO_ATTACH_FILTER 139#include <linux/types.h> 140#include <linux/filter.h> 141#endif 142 143#ifndef __GLIBC__ 144typedef int socklen_t; 145#endif 146 147#ifndef MSG_TRUNC 148/* 149 * This is being compiled on a system that lacks MSG_TRUNC; define it 150 * with the value it has in the 2.2 and later kernels, so that, on 151 * those kernels, when we pass it in the flags argument to "recvfrom()" 152 * we're passing the right value and thus get the MSG_TRUNC behavior 153 * we want. (We don't get that behavior on 2.0[.x] kernels, because 154 * they didn't support MSG_TRUNC.) 155 */ 156#define MSG_TRUNC 0x20 157#endif 158 159#define MAX_LINKHEADER_SIZE 256 160 161/* 162 * When capturing on all interfaces we use this as the buffer size. 163 * Should be bigger then all MTUs that occur in real life. 164 * 64kB should be enough for now. 165 */ 166#define BIGGER_THAN_ALL_MTUS (64*1024) 167 168/* 169 * Prototypes for internal functions 170 */ 171static void map_arphrd_to_dlt(pcap_t *, int); 172static int live_open_old(pcap_t *, char *, int, int, char *); 173static int live_open_new(pcap_t *, char *, int, int, char *); 174static int pcap_read_packet(pcap_t *, pcap_handler, u_char *); 175 176/* 177 * Wrap some ioctl calls 178 */ 179#ifdef HAVE_PF_PACKET_SOCKETS 180static int iface_get_id(int fd, const char *device, char *ebuf); 181#endif 182static int iface_get_mtu(int fd, const char *device, char *ebuf); 183static int iface_get_arptype(int fd, const char *device, char *ebuf); 184#ifdef HAVE_PF_PACKET_SOCKETS 185static int iface_bind(int fd, int ifindex, char *ebuf); 186#endif 187static int iface_bind_old(int fd, const char *device, char *ebuf); 188 189#ifdef SO_ATTACH_FILTER 190static int fix_program(pcap_t *handle, struct sock_fprog *fcode); 191static int fix_offset(struct bpf_insn *p); 192static int set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode); 193static int reset_kernel_filter(pcap_t *handle); 194 195static struct sock_filter total_insn 196 = BPF_STMT(BPF_RET | BPF_K, 0); 197static struct sock_fprog total_fcode 198 = { 1, &total_insn }; 199#endif 200 201/* 202 * Get a handle for a live capture from the given device. You can 203 * pass NULL as device to get all packages (without link level 204 * information of course). If you pass 1 as promisc the interface 205 * will be set to promiscous mode (XXX: I think this usage should 206 * be deprecated and functions be added to select that later allow 207 * modification of that values -- Torsten). 208 * 209 * See also pcap(3). 210 */ 211pcap_t * 212pcap_open_live(char *device, int snaplen, int promisc, int to_ms, char *ebuf) 213{ 214 pcap_t *handle; 215 int mtu; 216 struct utsname utsname; 217 218 /* Allocate a handle for this session. */ 219 220 handle = malloc(sizeof(*handle)); 221 if (handle == NULL) { 222 snprintf(ebuf, PCAP_ERRBUF_SIZE, "malloc: %s", 223 pcap_strerror(errno)); 224 return NULL; 225 } 226 227 /* Initialize some components of the pcap structure. */ 228 229 memset(handle, 0, sizeof(*handle)); 230 handle->snapshot = snaplen; 231 handle->md.timeout = to_ms; 232 233 /* 234 * NULL and "any" are special devices which give us the hint to 235 * monitor all devices. 236 */ 237 if (!device || strcmp(device, "any") == 0) { 238 device = NULL; 239 handle->md.device = strdup("any"); 240 if (promisc) { 241 promisc = 0; 242 /* Just a warning. */ 243 snprintf(ebuf, PCAP_ERRBUF_SIZE, 244 "Promiscuous mode not supported on the \"any\" device"); 245 } 246 247 } else 248 handle->md.device = strdup(device); 249 250 if (handle->md.device == NULL) { 251 snprintf(ebuf, PCAP_ERRBUF_SIZE, "strdup: %s", 252 pcap_strerror(errno) ); 253 free(handle); 254 return NULL; 255 } 256 257 /* 258 * Current Linux kernels use the protocol family PF_PACKET to 259 * allow direct access to all packets on the network while 260 * older kernels had a special socket type SOCK_PACKET to 261 * implement this feature. 262 * While this old implementation is kind of obsolete we need 263 * to be compatible with older kernels for a while so we are 264 * trying both methods with the newer method preferred. 265 */ 266 267 if (! (live_open_new(handle, device, promisc, to_ms, ebuf) || 268 live_open_old(handle, device, promisc, to_ms, ebuf)) ) 269 { 270 /* 271 * Both methods to open the packet socket failed. Tidy 272 * up and report our failure (ebuf is expected to be 273 * set by the functions above). 274 */ 275 276 free(handle->md.device); 277 free(handle); 278 return NULL; 279 } 280 281 /* 282 * Compute the buffer size. 283 * 284 * If we're using SOCK_PACKET, this might be a 2.0[.x] kernel, 285 * and might require special handling - check. 286 */ 287 if (handle->md.sock_packet && (uname(&utsname) < 0 || 288 strncmp(utsname.release, "2.0", 3) == 0)) { 289 /* 290 * We're using a SOCK_PACKET structure, and either 291 * we couldn't find out what kernel release this is, 292 * or it's a 2.0[.x] kernel. 293 * 294 * In the 2.0[.x] kernel, a "recvfrom()" on 295 * a SOCK_PACKET socket, with MSG_TRUNC set, will 296 * return the number of bytes read, so if we pass 297 * a length based on the snapshot length, it'll 298 * return the number of bytes from the packet 299 * copied to userland, not the actual length 300 * of the packet. 301 * 302 * This means that, for example, the IP dissector 303 * in tcpdump will get handed a packet length less 304 * than the length in the IP header, and will 305 * complain about "truncated-ip". 306 * 307 * So we don't bother trying to copy from the 308 * kernel only the bytes in which we're interested, 309 * but instead copy them all, just as the older 310 * versions of libpcap for Linux did. 311 * 312 * The buffer therefore needs to be big enough to 313 * hold the largest packet we can get from this 314 * device. Unfortunately, we can't get the MRU 315 * of the network; we can only get the MTU. The 316 * MTU may be too small, in which case a packet larger 317 * than the buffer size will be truncated *and* we 318 * won't get the actual packet size. 319 * 320 * However, if the snapshot length is larger than 321 * the buffer size based on the MTU, we use the 322 * snapshot length as the buffer size, instead; 323 * this means that with a sufficiently large snapshot 324 * length we won't artificially truncate packets 325 * to the MTU-based size. 326 * 327 * This mess just one of many problems with packet 328 * capture on 2.0[.x] kernels; you really want a 329 * 2.2[.x] or later kernel if you want packet capture 330 * to work well. 331 */ 332 mtu = iface_get_mtu(handle->fd, device, ebuf); 333 if (mtu == -1) { 334 close(handle->fd); 335 free(handle->md.device); 336 free(handle); 337 return NULL; 338 } 339 handle->bufsize = MAX_LINKHEADER_SIZE + mtu; 340 if (handle->bufsize < handle->snapshot) 341 handle->bufsize = handle->snapshot; 342 } else { 343 /* 344 * This is a 2.2[.x] or later kernel (we know that 345 * either because we're not using a SOCK_PACKET 346 * socket - PF_PACKET is supported only in 2.2 347 * and later kernels - or because we checked the 348 * kernel version). 349 * 350 * We can safely pass "recvfrom()" a byte count 351 * based on the snapshot length. 352 */ 353 handle->bufsize = handle->snapshot; 354 } 355 356 /* Allocate the buffer */ 357 358 handle->buffer = malloc(handle->bufsize + handle->offset); 359 if (!handle->buffer) { 360 snprintf(ebuf, PCAP_ERRBUF_SIZE, 361 "malloc: %s", pcap_strerror(errno)); 362 close(handle->fd); 363 free(handle->md.device); 364 free(handle); 365 return NULL; 366 } 367 368 return handle; 369} 370 371/* 372 * Read at most max_packets from the capture stream and call the callback 373 * for each of them. Returns the number of packets handled or -1 if an 374 * error occured. 375 */ 376int 377pcap_read(pcap_t *handle, int max_packets, pcap_handler callback, u_char *user) 378{ 379 /* 380 * Currently, on Linux only one packet is delivered per read, 381 * so we don't loop. 382 */ 383 return pcap_read_packet(handle, callback, user); 384} 385 386/* 387 * Read a packet from the socket calling the handler provided by 388 * the user. Returns the number of packets received or -1 if an 389 * error occured. 390 */ 391static int 392pcap_read_packet(pcap_t *handle, pcap_handler callback, u_char *userdata) 393{ 394 u_char *bp; 395 int offset; 396#ifdef HAVE_PF_PACKET_SOCKETS 397 struct sockaddr_ll from; 398 struct sll_header *hdrp; 399#else 400 struct sockaddr from; 401#endif 402 socklen_t fromlen; 403 int packet_len, caplen; 404 struct pcap_pkthdr pcap_header; 405 406#ifdef HAVE_PF_PACKET_SOCKETS 407 /* 408 * If this is a cooked device, leave extra room for a 409 * fake packet header. 410 */ 411 if (handle->md.cooked) 412 offset = SLL_HDR_LEN; 413 else 414 offset = 0; 415#else 416 /* 417 * This system doesn't have PF_PACKET sockets, so it doesn't 418 * support cooked devices. 419 */ 420 offset = 0; 421#endif 422 423 /* Receive a single packet from the kernel */ 424 425 bp = handle->buffer + handle->offset; 426 do { 427 fromlen = sizeof(from); 428 packet_len = recvfrom( 429 handle->fd, bp + offset, 430 handle->bufsize - offset, MSG_TRUNC, 431 (struct sockaddr *) &from, &fromlen); 432 } while (packet_len == -1 && errno == EINTR); 433 434 /* Check if an error occured */ 435 436 if (packet_len == -1) { 437 if (errno == EAGAIN) 438 return 0; /* no packet there */ 439 else { 440 snprintf(handle->errbuf, sizeof(handle->errbuf), 441 "recvfrom: %s", pcap_strerror(errno)); 442 return -1; 443 } 444 } 445 446#ifdef HAVE_PF_PACKET_SOCKETS 447 /* 448 * If this is from the loopback device, reject outgoing packets; 449 * we'll see the packet as an incoming packet as well, and 450 * we don't want to see it twice. 451 * 452 * We can only do this if we're using PF_PACKET; the address 453 * returned for SOCK_PACKET is a "sockaddr_pkt" which lacks 454 * the relevant packet type information. 455 */ 456 if (!handle->md.sock_packet && 457 from.sll_ifindex == handle->md.lo_ifindex && 458 from.sll_pkttype == PACKET_OUTGOING) 459 return 0; 460#endif 461 462#ifdef HAVE_PF_PACKET_SOCKETS 463 /* 464 * If this is a cooked device, fill in the fake packet header. 465 */ 466 if (handle->md.cooked) { 467 /* 468 * Add the length of the fake header to the length 469 * of packet data we read. 470 */ 471 packet_len += SLL_HDR_LEN; 472 473 hdrp = (struct sll_header *)bp; 474 475 /* 476 * Map the PACKET_ value to a LINUX_SLL_ value; we 477 * want the same numerical value to be used in 478 * the link-layer header even if the numerical values 479 * for the PACKET_ #defines change, so that programs 480 * that look at the packet type field will always be 481 * able to handle DLT_LINUX_SLL captures. 482 */ 483 switch (from.sll_pkttype) { 484 485 case PACKET_HOST: 486 hdrp->sll_pkttype = htons(LINUX_SLL_HOST); 487 break; 488 489 case PACKET_BROADCAST: 490 hdrp->sll_pkttype = htons(LINUX_SLL_BROADCAST); 491 break; 492 493 case PACKET_MULTICAST: 494 hdrp->sll_pkttype = htons(LINUX_SLL_MULTICAST); 495 break; 496 497 case PACKET_OTHERHOST: 498 hdrp->sll_pkttype = htons(LINUX_SLL_OTHERHOST); 499 break; 500 501 case PACKET_OUTGOING: 502 hdrp->sll_pkttype = htons(LINUX_SLL_OUTGOING); 503 break; 504 505 default: 506 hdrp->sll_pkttype = -1; 507 break; 508 } 509 510 hdrp->sll_hatype = htons(from.sll_hatype); 511 hdrp->sll_halen = htons(from.sll_halen); 512 memcpy(hdrp->sll_addr, from.sll_addr, 513 (from.sll_halen > SLL_ADDRLEN) ? 514 SLL_ADDRLEN : 515 from.sll_halen); 516 hdrp->sll_protocol = from.sll_protocol; 517 } 518#endif 519 520 /* 521 * XXX: According to the kernel source we should get the real 522 * packet len if calling recvfrom with MSG_TRUNC set. It does 523 * not seem to work here :(, but it is supported by this code 524 * anyway. 525 * To be honest the code RELIES on that feature so this is really 526 * broken with 2.2.x kernels. 527 * I spend a day to figure out what's going on and I found out 528 * that the following is happening: 529 * 530 * The packet comes from a random interface and the packet_rcv 531 * hook is called with a clone of the packet. That code inserts 532 * the packet into the receive queue of the packet socket. 533 * If a filter is attached to that socket that filter is run 534 * first - and there lies the problem. The default filter always 535 * cuts the packet at the snaplen: 536 * 537 * # tcpdump -d 538 * (000) ret #68 539 * 540 * So the packet filter cuts down the packet. The recvfrom call 541 * says "hey, it's only 68 bytes, it fits into the buffer" with 542 * the result that we don't get the real packet length. This 543 * is valid at least until kernel 2.2.17pre6. 544 * 545 * We currently handle this by making a copy of the filter 546 * program, fixing all "ret" instructions with non-zero 547 * operands to have an operand of 65535 so that the filter 548 * doesn't truncate the packet, and supplying that modified 549 * filter to the kernel. 550 */ 551 552 caplen = packet_len; 553 if (caplen > handle->snapshot) 554 caplen = handle->snapshot; 555 556 /* Run the packet filter if not using kernel filter */ 557 if (!handle->md.use_bpf && handle->fcode.bf_insns) { 558 if (bpf_filter(handle->fcode.bf_insns, bp, 559 packet_len, caplen) == 0) 560 { 561 /* rejected by filter */ 562 return 0; 563 } 564 } 565 566 /* Fill in our own header data */ 567 568 if (ioctl(handle->fd, SIOCGSTAMP, &pcap_header.ts) == -1) { 569 snprintf(handle->errbuf, sizeof(handle->errbuf), 570 "ioctl: %s", pcap_strerror(errno)); 571 return -1; 572 } 573 pcap_header.caplen = caplen; 574 pcap_header.len = packet_len; 575 576 /* 577 * Count the packet. 578 * 579 * Arguably, we should count them before we check the filter, 580 * as on many other platforms "ps_recv" counts packets 581 * handed to the filter rather than packets that passed 582 * the filter, but if filtering is done in the kernel, we 583 * can't get a count of packets that passed the filter, 584 * and that would mean the meaning of "ps_recv" wouldn't 585 * be the same on all Linux systems. 586 * 587 * XXX - it's not the same on all systems in any case; 588 * ideally, we should have a "get the statistics" call 589 * that supplies more counts and indicates which of them 590 * it supplies, so that we supply a count of packets 591 * handed to the filter only on platforms where that 592 * information is available. 593 * 594 * We count them here even if we can get the packet count 595 * from the kernel, as we can only determine at run time 596 * whether we'll be able to get it from the kernel (if 597 * HAVE_TPACKET_STATS isn't defined, we can't get it from 598 * the kernel, but if it is defined, the library might 599 * have been built with a 2.4 or later kernel, but we 600 * might be running on a 2.2[.x] kernel without Alexey 601 * Kuznetzov's turbopacket patches, and thus the kernel 602 * might not be able to supply those statistics). We 603 * could, I guess, try, when opening the socket, to get 604 * the statistics, and if we can not increment the count 605 * here, but it's not clear that always incrementing 606 * the count is more expensive than always testing a flag 607 * in memory. 608 */ 609 handle->md.stat.ps_recv++; 610 611 /* Call the user supplied callback function */ 612 callback(userdata, &pcap_header, bp); 613 614 return 1; 615} 616 617/* 618 * Get the statistics for the given packet capture handle. 619 * Reports the number of dropped packets iff the kernel supports 620 * the PACKET_STATISTICS "getsockopt()" argument (2.4 and later 621 * kernels, and 2.2[.x] kernels with Alexey Kuznetzov's turbopacket 622 * patches); otherwise, that information isn't available, and we lie 623 * and report 0 as the count of dropped packets. 624 */ 625int 626pcap_stats(pcap_t *handle, struct pcap_stat *stats) 627{ 628#ifdef HAVE_TPACKET_STATS 629 struct tpacket_stats kstats; 630 socklen_t len = sizeof (struct tpacket_stats); 631 632 /* 633 * Try to get the packet counts from the kernel. 634 */ 635 if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, 636 &kstats, &len) > -1) { 637 /* 638 * In "linux/net/packet/af_packet.c", at least in the 639 * 2.4.9 kernel, "tp_packets" is incremented for every 640 * packet that passes the packet filter *and* is 641 * successfully queued on the socket; "tp_drops" is 642 * incremented for every packet dropped because there's 643 * not enough free space in the socket buffer. 644 * 645 * When the statistics are returned for a PACKET_STATISTICS 646 * "getsockopt()" call, "tp_drops" is added to "tp_packets", 647 * so that "tp_packets" counts all packets handed to 648 * the PF_PACKET socket, including packets dropped because 649 * there wasn't room on the socket buffer - but not 650 * including packets that didn't pass the filter. 651 * 652 * In the BSD BPF, the count of received packets is 653 * incremented for every packet handed to BPF, regardless 654 * of whether it passed the filter. 655 * 656 * We can't make "pcap_stats()" work the same on both 657 * platforms, but the best approximation is to return 658 * "tp_packets" as the count of packets and "tp_drops" 659 * as the count of drops. 660 */ 661 handle->md.stat.ps_recv = kstats.tp_packets; 662 handle->md.stat.ps_drop = kstats.tp_drops; 663 } 664 else 665 { 666 /* 667 * If the error was EOPNOTSUPP, fall through, so that 668 * if you build the library on a system with 669 * "struct tpacket_stats" and run it on a system 670 * that doesn't, it works as it does if the library 671 * is built on a system without "struct tpacket_stats". 672 */ 673 if (errno != EOPNOTSUPP) { 674 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, 675 "pcap_stats: %s", pcap_strerror(errno)); 676 return -1; 677 } 678 } 679#endif 680 /* 681 * On systems where the PACKET_STATISTICS "getsockopt()" argument 682 * is supported on PF_PACKET sockets: 683 * 684 * "ps_recv" counts only packets that *passed* the filter, 685 * not packets that didn't pass the filter. This includes 686 * packets later dropped because we ran out of buffer space. 687 * 688 * "ps_drop" counts packets dropped because we ran out of 689 * buffer space. It doesn't count packets dropped by the 690 * interface driver. It counts only packets that passed 691 * the filter. 692 * 693 * Both statistics include packets not yet read from the 694 * kernel by libpcap, and thus not yet seen by the application. 695 * 696 * On systems where the PACKET_STATISTICS "getsockopt()" argument 697 * is not supported on PF_PACKET sockets: 698 * 699 * "ps_recv" counts only packets that *passed* the filter, 700 * not packets that didn't pass the filter. It does not 701 * count packets dropped because we ran out of buffer 702 * space. 703 * 704 * "ps_drop" is not supported. 705 * 706 * "ps_recv" doesn't include packets not yet read from 707 * the kernel by libpcap. 708 */ 709 *stats = handle->md.stat; 710 return 0; 711} 712 713/* 714 * Attach the given BPF code to the packet capture device. 715 */ 716int 717pcap_setfilter(pcap_t *handle, struct bpf_program *filter) 718{ 719#ifdef SO_ATTACH_FILTER 720 struct sock_fprog fcode; 721 int can_filter_in_kernel; 722#endif 723 724 if (!handle) 725 return -1; 726 if (!filter) { 727 strncpy(handle->errbuf, "setfilter: No filter specified", 728 sizeof(handle->errbuf)); 729 return -1; 730 } 731 732 /* Make our private copy of the filter */ 733 734 if (install_bpf_program(handle, filter) < 0) { 735 snprintf(handle->errbuf, sizeof(handle->errbuf), 736 "malloc: %s", pcap_strerror(errno)); 737 return -1; 738 } 739 740 /* 741 * Run user level packet filter by default. Will be overriden if 742 * installing a kernel filter succeeds. 743 */ 744 handle->md.use_bpf = 0; 745 746 /* 747 * If we're reading from a savefile, don't try to install 748 * a kernel filter. 749 */ 750 if (handle->sf.rfile != NULL) 751 return 0; 752 753 /* Install kernel level filter if possible */ 754 755#ifdef SO_ATTACH_FILTER 756#ifdef USHRT_MAX 757 if (handle->fcode.bf_len > USHRT_MAX) { 758 /* 759 * fcode.len is an unsigned short for current kernel. 760 * I have yet to see BPF-Code with that much 761 * instructions but still it is possible. So for the 762 * sake of correctness I added this check. 763 */ 764 fprintf(stderr, "Warning: Filter too complex for kernel\n"); 765 fcode.filter = NULL; 766 can_filter_in_kernel = 0; 767 } else 768#endif /* USHRT_MAX */ 769 { 770 /* 771 * Oh joy, the Linux kernel uses struct sock_fprog instead 772 * of struct bpf_program and of course the length field is 773 * of different size. Pointed out by Sebastian 774 * 775 * Oh, and we also need to fix it up so that all "ret" 776 * instructions with non-zero operands have 65535 as the 777 * operand, and so that, if we're in cooked mode, all 778 * memory-reference instructions use special magic offsets 779 * in references to the link-layer header and assume that 780 * the link-layer payload begins at 0; "fix_program()" 781 * will do that. 782 */ 783 switch (fix_program(handle, &fcode)) { 784 785 case -1: 786 default: 787 /* 788 * Fatal error; just quit. 789 * (The "default" case shouldn't happen; we 790 * return -1 for that reason.) 791 */ 792 return -1; 793 794 case 0: 795 /* 796 * The program performed checks that we can't make 797 * work in the kernel. 798 */ 799 can_filter_in_kernel = 0; 800 break; 801 802 case 1: 803 /* 804 * We have a filter that'll work in the kernel. 805 */ 806 can_filter_in_kernel = 1; 807 break; 808 } 809 } 810 811 if (can_filter_in_kernel) { 812 if (set_kernel_filter(handle, &fcode) == 0) 813 { 814 /* Installation succeded - using kernel filter. */ 815 handle->md.use_bpf = 1; 816 } 817 else 818 { 819 /* 820 * Print a warning if we weren't able to install 821 * the filter for a reason other than "this kernel 822 * isn't configured to support socket filters. 823 */ 824 if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) { 825 fprintf(stderr, 826 "Warning: Kernel filter failed: %s\n", 827 pcap_strerror(errno)); 828 } 829 } 830 } 831 832 /* 833 * If we're not using the kernel filter, get rid of any kernel 834 * filter that might've been there before, e.g. because the 835 * previous filter could work in the kernel, or because some other 836 * code attached a filter to the socket by some means other than 837 * calling "pcap_setfilter()". Otherwise, the kernel filter may 838 * filter out packets that would pass the new userland filter. 839 */ 840 if (!handle->md.use_bpf) 841 reset_kernel_filter(handle); 842 843 /* 844 * Free up the copy of the filter that was made by "fix_program()". 845 */ 846 if (fcode.filter != NULL) 847 free(fcode.filter); 848#endif /* SO_ATTACH_FILTER */ 849 850 return 0; 851} 852 853/* 854 * Linux uses the ARP hardware type to identify the type of an 855 * interface. pcap uses the DLT_xxx constants for this. This 856 * function takes a pointer to a "pcap_t", and an ARPHRD_xxx 857 * constant, as arguments, and sets "handle->linktype" to the 858 * appropriate DLT_XXX constant and sets "handle->offset" to 859 * the appropriate value (to make "handle->offset" plus link-layer 860 * header length be a multiple of 4, so that the link-layer payload 861 * will be aligned on a 4-byte boundary when capturing packets). 862 * (If the offset isn't set here, it'll be 0; add code as appropriate 863 * for cases where it shouldn't be 0.) 864 * 865 * Sets the link type to -1 if unable to map the type. 866 */ 867static void map_arphrd_to_dlt(pcap_t *handle, int arptype) 868{ 869 switch (arptype) { 870 871 case ARPHRD_ETHER: 872 case ARPHRD_METRICOM: 873 case ARPHRD_LOOPBACK: 874 handle->linktype = DLT_EN10MB; 875 handle->offset = 2; 876 break; 877 878 case ARPHRD_EETHER: 879 handle->linktype = DLT_EN3MB; 880 break; 881 882 case ARPHRD_AX25: 883 handle->linktype = DLT_AX25; 884 break; 885 886 case ARPHRD_PRONET: 887 handle->linktype = DLT_PRONET; 888 break; 889 890 case ARPHRD_CHAOS: 891 handle->linktype = DLT_CHAOS; 892 break; 893 894#ifndef ARPHRD_IEEE802_TR 895#define ARPHRD_IEEE802_TR 800 /* From Linux 2.4 */ 896#endif 897 case ARPHRD_IEEE802_TR: 898 case ARPHRD_IEEE802: 899 handle->linktype = DLT_IEEE802; 900 handle->offset = 2; 901 break; 902 903 case ARPHRD_ARCNET: 904 handle->linktype = DLT_ARCNET; 905 break; 906 907 case ARPHRD_FDDI: 908 handle->linktype = DLT_FDDI; 909 handle->offset = 3; 910 break; 911 912#ifndef ARPHRD_ATM /* FIXME: How to #include this? */ 913#define ARPHRD_ATM 19 914#endif 915 case ARPHRD_ATM: 916 /* 917 * The Classical IP implementation in ATM for Linux 918 * supports both what RFC 1483 calls "LLC Encapsulation", 919 * in which each packet has an LLC header, possibly 920 * with a SNAP header as well, prepended to it, and 921 * what RFC 1483 calls "VC Based Multiplexing", in which 922 * different virtual circuits carry different network 923 * layer protocols, and no header is prepended to packets. 924 * 925 * They both have an ARPHRD_ type of ARPHRD_ATM, so 926 * you can't use the ARPHRD_ type to find out whether 927 * captured packets will have an LLC header, and, 928 * while there's a socket ioctl to *set* the encapsulation 929 * type, there's no ioctl to *get* the encapsulation type. 930 * 931 * This means that 932 * 933 * programs that dissect Linux Classical IP frames 934 * would have to check for an LLC header and, 935 * depending on whether they see one or not, dissect 936 * the frame as LLC-encapsulated or as raw IP (I 937 * don't know whether there's any traffic other than 938 * IP that would show up on the socket, or whether 939 * there's any support for IPv6 in the Linux 940 * Classical IP code); 941 * 942 * filter expressions would have to compile into 943 * code that checks for an LLC header and does 944 * the right thing. 945 * 946 * Both of those are a nuisance - and, at least on systems 947 * that support PF_PACKET sockets, we don't have to put 948 * up with those nuisances; instead, we can just capture 949 * in cooked mode. That's what we'll do. 950 */ 951 handle->linktype = DLT_LINUX_SLL; 952 break; 953 954#ifndef ARPHRD_IEEE80211 /* From Linux 2.4.6 */ 955#define ARPHRD_IEEE80211 801 956#endif 957 case ARPHRD_IEEE80211: 958 handle->linktype = DLT_IEEE802_11; 959 break; 960 961 case ARPHRD_PPP: 962 /* 963 * Some PPP code in the kernel supplies no link-layer 964 * header whatsoever to PF_PACKET sockets; other PPP 965 * code supplies PPP link-layer headers ("syncppp.c"); 966 * some PPP code might supply random link-layer 967 * headers (PPP over ISDN - there's code in Ethereal, 968 * for example, to cope with PPP-over-ISDN captures 969 * with which the Ethereal developers have had to cope, 970 * heuristically trying to determine which of the 971 * oddball link-layer headers particular packets have). 972 * 973 * As such, we just punt, and run all PPP interfaces 974 * in cooked mode. 975 */ 976 handle->linktype = DLT_LINUX_SLL; 977 break; 978 979 case ARPHRD_HDLC: 980 handle->linktype = DLT_C_HDLC; 981 break; 982 983 /* Not sure if this is correct for all tunnels, but it 984 * works for CIPE */ 985 case ARPHRD_TUNNEL: 986#ifndef ARPHRD_SIT 987#define ARPHRD_SIT 776 /* From Linux 2.2.14 */ 988#endif 989 case ARPHRD_SIT: 990 case ARPHRD_CSLIP: 991 case ARPHRD_SLIP6: 992 case ARPHRD_CSLIP6: 993 case ARPHRD_ADAPT: 994 case ARPHRD_SLIP: 995 /* 996 * XXX - should some of those be mapped to DLT_LINUX_SLL 997 * instead? Should we just map all of them to DLT_LINUX_SLL? 998 */ 999 handle->linktype = DLT_RAW; 1000 break; 1001 1002 case ARPHRD_LOCALTLK: 1003 handle->linktype = DLT_LTALK; 1004 break; 1005 1006 default: 1007 handle->linktype = -1; 1008 break; 1009 } 1010} 1011 1012/* ===== Functions to interface to the newer kernels ================== */ 1013 1014/* 1015 * Try to open a packet socket using the new kernel interface. 1016 * Returns 0 on failure. 1017 * FIXME: 0 uses to mean success (Sebastian) 1018 */ 1019static int 1020live_open_new(pcap_t *handle, char *device, int promisc, 1021 int to_ms, char *ebuf) 1022{ 1023#ifdef HAVE_PF_PACKET_SOCKETS 1024 int sock_fd = -1, device_id, arptype; 1025 struct packet_mreq mr; 1026 1027 /* One shot loop used for error handling - bail out with break */ 1028 1029 do { 1030 /* 1031 * Open a socket with protocol family packet. If a device is 1032 * given we try to open it in raw mode otherwise we use 1033 * the cooked interface. 1034 */ 1035 sock_fd = device ? 1036 socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL)) 1037 : socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_ALL)); 1038 1039 if (sock_fd == -1) { 1040 snprintf(ebuf, PCAP_ERRBUF_SIZE, "socket: %s", 1041 pcap_strerror(errno) ); 1042 break; 1043 } 1044 1045 /* It seems the kernel supports the new interface. */ 1046 handle->md.sock_packet = 0; 1047 1048 /* 1049 * Get the interface index of the loopback device. 1050 * If the attempt fails, don't fail, just set the 1051 * "md.lo_ifindex" to -1. 1052 * 1053 * XXX - can there be more than one device that loops 1054 * packets back, i.e. devices other than "lo"? If so, 1055 * we'd need to find them all, and have an array of 1056 * indices for them, and check all of them in 1057 * "pcap_read_packet()". 1058 */ 1059 handle->md.lo_ifindex = iface_get_id(sock_fd, "lo", ebuf); 1060 1061 /* 1062 * Default value for offset to align link-layer payload 1063 * on a 4-byte boundary. 1064 */ 1065 handle->offset = 0; 1066 1067 /* 1068 * What kind of frames do we have to deal with? Fall back 1069 * to cooked mode if we have an unknown interface type. 1070 */ 1071 1072 if (device) { 1073 /* Assume for now we don't need cooked mode. */ 1074 handle->md.cooked = 0; 1075 1076 arptype = iface_get_arptype(sock_fd, device, ebuf); 1077 if (arptype == -1) 1078 break; 1079 map_arphrd_to_dlt(handle, arptype); 1080 if (handle->linktype == -1 || 1081 handle->linktype == DLT_LINUX_SLL || 1082 (handle->linktype == DLT_EN10MB && 1083 (strncmp("isdn", device, 4) == 0 || 1084 strncmp("isdY", device, 4) == 0))) { 1085 /* 1086 * Unknown interface type (-1), or a 1087 * device we explicitly chose to run 1088 * in cooked mode (e.g., PPP devices), 1089 * or an ISDN device (whose link-layer 1090 * type we can only determine by using 1091 * APIs that may be different on different 1092 * kernels) - reopen in cooked mode. 1093 */ 1094 if (close(sock_fd) == -1) { 1095 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1096 "close: %s", pcap_strerror(errno)); 1097 break; 1098 } 1099 sock_fd = socket(PF_PACKET, SOCK_DGRAM, 1100 htons(ETH_P_ALL)); 1101 if (sock_fd == -1) { 1102 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1103 "socket: %s", pcap_strerror(errno)); 1104 break; 1105 } 1106 handle->md.cooked = 1; 1107 1108 if (handle->linktype == -1) { 1109 /* 1110 * Warn that we're falling back on 1111 * cooked mode; we may want to 1112 * update "map_arphrd_to_dlt()" 1113 * to handle the new type. 1114 */ 1115 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1116 "arptype %d not " 1117 "supported by libpcap - " 1118 "falling back to cooked " 1119 "socket", 1120 arptype); 1121 } 1122 handle->linktype = DLT_LINUX_SLL; 1123 } 1124 1125 device_id = iface_get_id(sock_fd, device, ebuf); 1126 if (device_id == -1) 1127 break; 1128 1129 if (iface_bind(sock_fd, device_id, ebuf) == -1) 1130 break; 1131 } else { 1132 /* 1133 * This is cooked mode. 1134 */ 1135 handle->md.cooked = 1; 1136 handle->linktype = DLT_LINUX_SLL; 1137 1138 /* 1139 * XXX - squelch GCC complaints about 1140 * uninitialized variables; if we can't 1141 * select promiscuous mode on all interfaces, 1142 * we should move the code below into the 1143 * "if (device)" branch of the "if" and 1144 * get rid of the next statement. 1145 */ 1146 device_id = -1; 1147 } 1148 1149 /* Select promiscuous mode on/off */ 1150 1151#ifdef SOL_PACKET 1152 /* 1153 * Hmm, how can we set promiscuous mode on all interfaces? 1154 * I am not sure if that is possible at all. 1155 */ 1156 1157 if (device) { 1158 memset(&mr, 0, sizeof(mr)); 1159 mr.mr_ifindex = device_id; 1160 mr.mr_type = promisc ? 1161 PACKET_MR_PROMISC : PACKET_MR_ALLMULTI; 1162 if (setsockopt(sock_fd, SOL_PACKET, 1163 PACKET_ADD_MEMBERSHIP, &mr, sizeof(mr)) == -1) 1164 { 1165 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1166 "setsockopt: %s", pcap_strerror(errno)); 1167 break; 1168 } 1169 } 1170#endif 1171 1172 /* Save the socket FD in the pcap structure */ 1173 1174 handle->fd = sock_fd; 1175 1176 return 1; 1177 1178 } while(0); 1179 1180 if (sock_fd != -1) 1181 close(sock_fd); 1182 return 0; 1183#else 1184 strncpy(ebuf, 1185 "New packet capturing interface not supported by build " 1186 "environment", PCAP_ERRBUF_SIZE); 1187 return 0; 1188#endif 1189} 1190 1191#ifdef HAVE_PF_PACKET_SOCKETS 1192/* 1193 * Return the index of the given device name. Fill ebuf and return 1194 * -1 on failure. 1195 */ 1196static int 1197iface_get_id(int fd, const char *device, char *ebuf) 1198{ 1199 struct ifreq ifr; 1200 1201 memset(&ifr, 0, sizeof(ifr)); 1202 strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 1203 1204 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) { 1205 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1206 "ioctl: %s", pcap_strerror(errno)); 1207 return -1; 1208 } 1209 1210 return ifr.ifr_ifindex; 1211} 1212 1213/* 1214 * Bind the socket associated with FD to the given device. 1215 */ 1216static int 1217iface_bind(int fd, int ifindex, char *ebuf) 1218{ 1219 struct sockaddr_ll sll; 1220 1221 memset(&sll, 0, sizeof(sll)); 1222 sll.sll_family = AF_PACKET; 1223 sll.sll_ifindex = ifindex; 1224 sll.sll_protocol = htons(ETH_P_ALL); 1225 1226 if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) { 1227 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1228 "bind: %s", pcap_strerror(errno)); 1229 return -1; 1230 } 1231 1232 return 0; 1233} 1234 1235#endif 1236 1237 1238/* ===== Functions to interface to the older kernels ================== */ 1239 1240/* 1241 * With older kernels promiscuous mode is kind of interesting because we 1242 * have to reset the interface before exiting. The problem can't really 1243 * be solved without some daemon taking care of managing usage counts. 1244 * If we put the interface into promiscuous mode, we set a flag indicating 1245 * that we must take it out of that mode when the interface is closed, 1246 * and, when closing the interface, if that flag is set we take it out 1247 * of promiscuous mode. 1248 */ 1249 1250/* 1251 * List of pcaps for which we turned promiscuous mode on by hand. 1252 * If there are any such pcaps, we arrange to call "pcap_close_all()" 1253 * when we exit, and have it close all of them to turn promiscuous mode 1254 * off. 1255 */ 1256static struct pcap *pcaps_to_close; 1257 1258/* 1259 * TRUE if we've already called "atexit()" to cause "pcap_close_all()" to 1260 * be called on exit. 1261 */ 1262static int did_atexit; 1263 1264static void pcap_close_all(void) 1265{ 1266 struct pcap *handle; 1267 1268 while ((handle = pcaps_to_close) != NULL) 1269 pcap_close(handle); 1270} 1271 1272void pcap_close_linux( pcap_t *handle ) 1273{ 1274 struct pcap *p, *prevp; 1275 struct ifreq ifr; 1276 1277 if (handle->md.clear_promisc) { 1278 /* 1279 * We put the interface into promiscuous mode; take 1280 * it out of promiscuous mode. 1281 * 1282 * XXX - if somebody else wants it in promiscuous mode, 1283 * this code cannot know that, so it'll take it out 1284 * of promiscuous mode. That's not fixable in 2.0[.x] 1285 * kernels. 1286 */ 1287 memset(&ifr, 0, sizeof(ifr)); 1288 strncpy(ifr.ifr_name, handle->md.device, sizeof(ifr.ifr_name)); 1289 if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) { 1290 fprintf(stderr, 1291 "Can't restore interface flags (SIOCGIFFLAGS failed: %s).\n" 1292 "Please adjust manually.\n" 1293 "Hint: This can't happen with Linux >= 2.2.0.\n", 1294 strerror(errno)); 1295 } else { 1296 if (ifr.ifr_flags & IFF_PROMISC) { 1297 /* 1298 * Promiscuous mode is currently on; turn it 1299 * off. 1300 */ 1301 ifr.ifr_flags &= ~IFF_PROMISC; 1302 if (ioctl(handle->fd, SIOCSIFFLAGS, &ifr) == -1) { 1303 fprintf(stderr, 1304 "Can't restore interface flags (SIOCSIFFLAGS failed: %s).\n" 1305 "Please adjust manually.\n" 1306 "Hint: This can't happen with Linux >= 2.2.0.\n", 1307 strerror(errno)); 1308 } 1309 } 1310 } 1311 1312 /* 1313 * Take this pcap out of the list of pcaps for which we 1314 * have to take the interface out of promiscuous mode. 1315 */ 1316 for (p = pcaps_to_close, prevp = NULL; p != NULL; 1317 prevp = p, p = p->md.next) { 1318 if (p == handle) { 1319 /* 1320 * Found it. Remove it from the list. 1321 */ 1322 if (prevp == NULL) { 1323 /* 1324 * It was at the head of the list. 1325 */ 1326 pcaps_to_close = p->md.next; 1327 } else { 1328 /* 1329 * It was in the middle of the list. 1330 */ 1331 prevp->md.next = p->md.next; 1332 } 1333 break; 1334 } 1335 } 1336 } 1337 if (handle->md.device != NULL) 1338 free(handle->md.device); 1339} 1340 1341/* 1342 * Try to open a packet socket using the old kernel interface. 1343 * Returns 0 on failure. 1344 * FIXME: 0 uses to mean success (Sebastian) 1345 */ 1346static int 1347live_open_old(pcap_t *handle, char *device, int promisc, 1348 int to_ms, char *ebuf) 1349{ 1350 int sock_fd = -1, arptype; 1351 struct ifreq ifr; 1352 1353 do { 1354 /* Open the socket */ 1355 1356 sock_fd = socket(PF_INET, SOCK_PACKET, htons(ETH_P_ALL)); 1357 if (sock_fd == -1) { 1358 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1359 "socket: %s", pcap_strerror(errno)); 1360 break; 1361 } 1362 1363 /* It worked - we are using the old interface */ 1364 handle->md.sock_packet = 1; 1365 1366 /* ...which means we get the link-layer header. */ 1367 handle->md.cooked = 0; 1368 1369 /* Bind to the given device */ 1370 1371 if (!device) { 1372 strncpy(ebuf, "pcap_open_live: The \"any\" device isn't supported on 2.0[.x]-kernel systems", 1373 PCAP_ERRBUF_SIZE); 1374 break; 1375 } 1376 if (iface_bind_old(sock_fd, device, ebuf) == -1) 1377 break; 1378 1379 /* Go to promisc mode */ 1380 if (promisc) { 1381 memset(&ifr, 0, sizeof(ifr)); 1382 strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 1383 if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) { 1384 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1385 "ioctl: %s", pcap_strerror(errno)); 1386 break; 1387 } 1388 if ((ifr.ifr_flags & IFF_PROMISC) == 0) { 1389 /* 1390 * Promiscuous mode isn't currently on, 1391 * so turn it on, and remember that 1392 * we should turn it off when the 1393 * pcap_t is closed. 1394 */ 1395 1396 /* 1397 * If we haven't already done so, arrange 1398 * to have "pcap_close_all()" called when 1399 * we exit. 1400 */ 1401 if (!did_atexit) { 1402 if (atexit(pcap_close_all) == -1) { 1403 /* 1404 * "atexit()" failed; don't 1405 * put the interface in 1406 * promiscuous mode, just 1407 * give up. 1408 */ 1409 strncpy(ebuf, "atexit failed", 1410 PCAP_ERRBUF_SIZE); 1411 break; 1412 } 1413 } 1414 1415 ifr.ifr_flags |= IFF_PROMISC; 1416 if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) { 1417 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1418 "ioctl: %s", 1419 pcap_strerror(errno)); 1420 break; 1421 } 1422 handle->md.clear_promisc = 1; 1423 1424 /* 1425 * Add this to the list of pcaps 1426 * to close when we exit. 1427 */ 1428 handle->md.next = pcaps_to_close; 1429 pcaps_to_close = handle; 1430 } 1431 } 1432 1433 /* All done - fill in the pcap handle */ 1434 1435 arptype = iface_get_arptype(sock_fd, device, ebuf); 1436 if (arptype == -1) 1437 break; 1438 1439 /* Save the socket FD in the pcap structure */ 1440 1441 handle->fd = sock_fd; 1442 1443 /* 1444 * Default value for offset to align link-layer payload 1445 * on a 4-byte boundary. 1446 */ 1447 handle->offset = 0; 1448 1449 /* 1450 * XXX - handle ISDN types here? We can't fall back on 1451 * cooked sockets, so we'd have to figure out from the 1452 * device name what type of link-layer encapsulation 1453 * it's using, and map that to an appropriate DLT_ 1454 * value, meaning we'd map "isdnN" devices to DLT_RAW 1455 * (they supply raw IP packets with no link-layer 1456 * header) and "isdY" devices to a new DLT_I4L_IP 1457 * type that has only an Ethernet packet type as 1458 * a link-layer header. 1459 */ 1460 map_arphrd_to_dlt(handle, arptype); 1461 if (handle->linktype == -1 || 1462 handle->linktype == DLT_LINUX_SLL) { 1463 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1464 "interface type of %s not supported", device); 1465 break; 1466 } 1467 1468 return 1; 1469 1470 } while (0); 1471 1472 if (sock_fd != -1) 1473 close(sock_fd); 1474 return 0; 1475} 1476 1477/* 1478 * Bind the socket associated with FD to the given device using the 1479 * interface of the old kernels. 1480 */ 1481static int 1482iface_bind_old(int fd, const char *device, char *ebuf) 1483{ 1484 struct sockaddr saddr; 1485 1486 memset(&saddr, 0, sizeof(saddr)); 1487 strncpy(saddr.sa_data, device, sizeof(saddr.sa_data)); 1488 if (bind(fd, &saddr, sizeof(saddr)) == -1) { 1489 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1490 "bind: %s", pcap_strerror(errno)); 1491 return -1; 1492 } 1493 1494 return 0; 1495} 1496 1497 1498/* ===== System calls available on all supported kernels ============== */ 1499 1500/* 1501 * Query the kernel for the MTU of the given interface. 1502 */ 1503static int 1504iface_get_mtu(int fd, const char *device, char *ebuf) 1505{ 1506 struct ifreq ifr; 1507 1508 if (!device) 1509 return BIGGER_THAN_ALL_MTUS; 1510 1511 memset(&ifr, 0, sizeof(ifr)); 1512 strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 1513 1514 if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) { 1515 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1516 "ioctl: %s", pcap_strerror(errno)); 1517 return -1; 1518 } 1519 1520 return ifr.ifr_mtu; 1521} 1522 1523/* 1524 * Get the hardware type of the given interface as ARPHRD_xxx constant. 1525 */ 1526static int 1527iface_get_arptype(int fd, const char *device, char *ebuf) 1528{ 1529 struct ifreq ifr; 1530 1531 memset(&ifr, 0, sizeof(ifr)); 1532 strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name)); 1533 1534 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) { 1535 snprintf(ebuf, PCAP_ERRBUF_SIZE, 1536 "ioctl: %s", pcap_strerror(errno)); 1537 return -1; 1538 } 1539 1540 return ifr.ifr_hwaddr.sa_family; 1541} 1542 1543#ifdef SO_ATTACH_FILTER 1544static int 1545fix_program(pcap_t *handle, struct sock_fprog *fcode) 1546{ 1547 size_t prog_size; 1548 register int i; 1549 register struct bpf_insn *p; 1550 struct bpf_insn *f; 1551 int len; 1552 1553 /* 1554 * Make a copy of the filter, and modify that copy if 1555 * necessary. 1556 */ 1557 prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len; 1558 len = handle->fcode.bf_len; 1559 f = (struct bpf_insn *)malloc(prog_size); 1560 if (f == NULL) { 1561 snprintf(handle->errbuf, sizeof(handle->errbuf), 1562 "malloc: %s", pcap_strerror(errno)); 1563 return -1; 1564 } 1565 memcpy(f, handle->fcode.bf_insns, prog_size); 1566 fcode->len = len; 1567 fcode->filter = (struct sock_filter *) f; 1568 1569 for (i = 0; i < len; ++i) { 1570 p = &f[i]; 1571 /* 1572 * What type of instruction is this? 1573 */ 1574 switch (BPF_CLASS(p->code)) { 1575 1576 case BPF_RET: 1577 /* 1578 * It's a return instruction; is the snapshot 1579 * length a constant, rather than the contents 1580 * of the accumulator? 1581 */ 1582 if (BPF_MODE(p->code) == BPF_K) { 1583 /* 1584 * Yes - if the value to be returned, 1585 * i.e. the snapshot length, is anything 1586 * other than 0, make it 65535, so that 1587 * the packet is truncated by "recvfrom()", 1588 * not by the filter. 1589 * 1590 * XXX - there's nothing we can easily do 1591 * if it's getting the value from the 1592 * accumulator; we'd have to insert 1593 * code to force non-zero values to be 1594 * 65535. 1595 */ 1596 if (p->k != 0) 1597 p->k = 65535; 1598 } 1599 break; 1600 1601 case BPF_LD: 1602 case BPF_LDX: 1603 /* 1604 * It's a load instruction; is it loading 1605 * from the packet? 1606 */ 1607 switch (BPF_MODE(p->code)) { 1608 1609 case BPF_ABS: 1610 case BPF_IND: 1611 case BPF_MSH: 1612 /* 1613 * Yes; are we in cooked mode? 1614 */ 1615 if (handle->md.cooked) { 1616 /* 1617 * Yes, so we need to fix this 1618 * instruction. 1619 */ 1620 if (fix_offset(p) < 0) { 1621 /* 1622 * We failed to do so. 1623 * Return 0, so our caller 1624 * knows to punt to userland. 1625 */ 1626 return 0; 1627 } 1628 } 1629 break; 1630 } 1631 break; 1632 } 1633 } 1634 return 1; /* we succeeded */ 1635} 1636 1637static int 1638fix_offset(struct bpf_insn *p) 1639{ 1640 /* 1641 * What's the offset? 1642 */ 1643 if (p->k >= SLL_HDR_LEN) { 1644 /* 1645 * It's within the link-layer payload; that starts at an 1646 * offset of 0, as far as the kernel packet filter is 1647 * concerned, so subtract the length of the link-layer 1648 * header. 1649 */ 1650 p->k -= SLL_HDR_LEN; 1651 } else if (p->k == 14) { 1652 /* 1653 * It's the protocol field; map it to the special magic 1654 * kernel offset for that field. 1655 */ 1656 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL; 1657 } else { 1658 /* 1659 * It's within the header, but it's not one of those 1660 * fields; we can't do that in the kernel, so punt 1661 * to userland. 1662 */ 1663 return -1; 1664 } 1665 return 0; 1666} 1667 1668static int 1669set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode) 1670{ 1671 int total_filter_on = 0; 1672 int save_mode; 1673 int ret; 1674 int save_errno; 1675 1676 /* 1677 * The socket filter code doesn't discard all packets queued 1678 * up on the socket when the filter is changed; this means 1679 * that packets that don't match the new filter may show up 1680 * after the new filter is put onto the socket, if those 1681 * packets haven't yet been read. 1682 * 1683 * This means, for example, that if you do a tcpdump capture 1684 * with a filter, the first few packets in the capture might 1685 * be packets that wouldn't have passed the filter. 1686 * 1687 * We therefore discard all packets queued up on the socket 1688 * when setting a kernel filter. (This isn't an issue for 1689 * userland filters, as the userland filtering is done after 1690 * packets are queued up.) 1691 * 1692 * To flush those packets, we put the socket in read-only mode, 1693 * and read packets from the socket until there are no more to 1694 * read. 1695 * 1696 * In order to keep that from being an infinite loop - i.e., 1697 * to keep more packets from arriving while we're draining 1698 * the queue - we put the "total filter", which is a filter 1699 * that rejects all packets, onto the socket before draining 1700 * the queue. 1701 * 1702 * This code deliberately ignores any errors, so that you may 1703 * get bogus packets if an error occurs, rather than having 1704 * the filtering done in userland even if it could have been 1705 * done in the kernel. 1706 */ 1707 if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 1708 &total_fcode, sizeof(total_fcode)) == 0) { 1709 char drain[1]; 1710 1711 /* 1712 * Note that we've put the total filter onto the socket. 1713 */ 1714 total_filter_on = 1; 1715 1716 /* 1717 * Save the socket's current mode, and put it in 1718 * non-blocking mode; we drain it by reading packets 1719 * until we get an error (which we assume is a 1720 * "nothing more to be read" error). 1721 */ 1722 save_mode = fcntl(handle->fd, F_GETFL, 0); 1723 if (save_mode != -1 && 1724 fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) >= 0) { 1725 while (recv(handle->fd, &drain, sizeof drain, 1726 MSG_TRUNC) >= 0) 1727 ; 1728 fcntl(handle->fd, F_SETFL, save_mode); 1729 } 1730 } 1731 1732 /* 1733 * Now attach the new filter. 1734 */ 1735 ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER, 1736 fcode, sizeof(*fcode)); 1737 if (ret == -1 && total_filter_on) { 1738 /* 1739 * Well, we couldn't set that filter on the socket, 1740 * but we could set the total filter on the socket. 1741 * 1742 * This could, for example, mean that the filter was 1743 * too big to put into the kernel, so we'll have to 1744 * filter in userland; in any case, we'll be doing 1745 * filtering in userland, so we need to remove the 1746 * total filter so we see packets. 1747 */ 1748 save_errno = errno; 1749 1750 /* 1751 * XXX - if this fails, we're really screwed; 1752 * we have the total filter on the socket, 1753 * and it won't come off. What do we do then? 1754 */ 1755 reset_kernel_filter(handle); 1756 1757 errno = save_errno; 1758 } 1759 return ret; 1760} 1761 1762static int 1763reset_kernel_filter(pcap_t *handle) 1764{ 1765 /* setsockopt() barfs unless it get a dummy parameter */ 1766 int dummy; 1767 1768 return setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER, 1769 &dummy, sizeof(dummy)); 1770} 1771#endif 1772