pcap-linux.c revision 146768
1168404Spjd/*
2168404Spjd *  pcap-linux.c: Packet capture interface to the Linux kernel
3168404Spjd *
4168404Spjd *  Copyright (c) 2000 Torsten Landschoff <torsten@debian.org>
5168404Spjd *  		       Sebastian Krahmer  <krahmer@cs.uni-potsdam.de>
6168404Spjd *
7168404Spjd *  License: BSD
8168404Spjd *
9168404Spjd *  Redistribution and use in source and binary forms, with or without
10168404Spjd *  modification, are permitted provided that the following conditions
11168404Spjd *  are met:
12168404Spjd *
13168404Spjd *  1. Redistributions of source code must retain the above copyright
14168404Spjd *     notice, this list of conditions and the following disclaimer.
15168404Spjd *  2. Redistributions in binary form must reproduce the above copyright
16168404Spjd *     notice, this list of conditions and the following disclaimer in
17168404Spjd *     the documentation and/or other materials provided with the
18168404Spjd *     distribution.
19168404Spjd *  3. The names of the authors may not be used to endorse or promote
20168404Spjd *     products derived from this software without specific prior
21168404Spjd *     written permission.
22168404Spjd *
23168404Spjd *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24168404Spjd *  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25168404Spjd *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26168404Spjd */
27168404Spjd
28168404Spjd#ifndef lint
29168404Spjdstatic const char rcsid[] _U_ =
30168404Spjd    "@(#) $Header: /tcpdump/master/libpcap/pcap-linux.c,v 1.110 2004/10/19 07:06:12 guy Exp $ (LBL)";
31168404Spjd#endif
32168404Spjd
33168404Spjd/*
34168404Spjd * Known problems with 2.0[.x] kernels:
35168404Spjd *
36168404Spjd *   - The loopback device gives every packet twice; on 2.2[.x] kernels,
37168404Spjd *     if we use PF_PACKET, we can filter out the transmitted version
38168404Spjd *     of the packet by using data in the "sockaddr_ll" returned by
39168404Spjd *     "recvfrom()", but, on 2.0[.x] kernels, we have to use
40168404Spjd *     PF_INET/SOCK_PACKET, which means "recvfrom()" supplies a
41168404Spjd *     "sockaddr_pkt" which doesn't give us enough information to let
42168404Spjd *     us do that.
43168404Spjd *
44168404Spjd *   - We have to set the interface's IFF_PROMISC flag ourselves, if
45168404Spjd *     we're to run in promiscuous mode, which means we have to turn
46168404Spjd *     it off ourselves when we're done; the kernel doesn't keep track
47168404Spjd *     of how many sockets are listening promiscuously, which means
48168404Spjd *     it won't get turned off automatically when no sockets are
49168404Spjd *     listening promiscuously.  We catch "pcap_close()" and, for
50168404Spjd *     interfaces we put into promiscuous mode, take them out of
51168404Spjd *     promiscuous mode - which isn't necessarily the right thing to
52168404Spjd *     do, if another socket also requested promiscuous mode between
53168404Spjd *     the time when we opened the socket and the time when we close
54168404Spjd *     the socket.
55168404Spjd *
56168404Spjd *   - MSG_TRUNC isn't supported, so you can't specify that "recvfrom()"
57168404Spjd *     return the amount of data that you could have read, rather than
58168404Spjd *     the amount that was returned, so we can't just allocate a buffer
59168404Spjd *     whose size is the snapshot length and pass the snapshot length
60168404Spjd *     as the byte count, and also pass MSG_TRUNC, so that the return
61168404Spjd *     value tells us how long the packet was on the wire.
62168404Spjd *
63168404Spjd *     This means that, if we want to get the actual size of the packet,
64168404Spjd *     so we can return it in the "len" field of the packet header,
65168404Spjd *     we have to read the entire packet, not just the part that fits
66168404Spjd *     within the snapshot length, and thus waste CPU time copying data
67168404Spjd *     from the kernel that our caller won't see.
68168404Spjd *
69168404Spjd *     We have to get the actual size, and supply it in "len", because
70168404Spjd *     otherwise, the IP dissector in tcpdump, for example, will complain
71168404Spjd *     about "truncated-ip", as the packet will appear to have been
72168404Spjd *     shorter, on the wire, than the IP header said it should have been.
73168404Spjd */
74168404Spjd
75168404Spjd
76168404Spjd#ifdef HAVE_CONFIG_H
77168404Spjd#include "config.h"
78168404Spjd#endif
79168404Spjd
80168404Spjd#include "pcap-int.h"
81168404Spjd#include "sll.h"
82168404Spjd
83168404Spjd#ifdef HAVE_DAG_API
84168404Spjd#include "pcap-dag.h"
85168404Spjd#endif /* HAVE_DAG_API */
86168404Spjd
87168404Spjd#include <errno.h>
88168404Spjd#include <stdlib.h>
89168404Spjd#include <unistd.h>
90168404Spjd#include <fcntl.h>
91168404Spjd#include <string.h>
92168404Spjd#include <sys/socket.h>
93168404Spjd#include <sys/ioctl.h>
94168404Spjd#include <sys/utsname.h>
95168404Spjd#include <net/if.h>
96168404Spjd#include <netinet/in.h>
97168404Spjd#include <linux/if_ether.h>
98168404Spjd#include <net/if_arp.h>
99168404Spjd
100168404Spjd/*
101168404Spjd * If PF_PACKET is defined, we can use {SOCK_RAW,SOCK_DGRAM}/PF_PACKET
102168404Spjd * sockets rather than SOCK_PACKET sockets.
103168404Spjd *
104168404Spjd * To use them, we include <linux/if_packet.h> rather than
105168404Spjd * <netpacket/packet.h>; we do so because
106168404Spjd *
107168404Spjd *	some Linux distributions (e.g., Slackware 4.0) have 2.2 or
108168404Spjd *	later kernels and libc5, and don't provide a <netpacket/packet.h>
109168404Spjd *	file;
110168404Spjd *
111168404Spjd *	not all versions of glibc2 have a <netpacket/packet.h> file
112168404Spjd *	that defines stuff needed for some of the 2.4-or-later-kernel
113168404Spjd *	features, so if the system has a 2.4 or later kernel, we
114168404Spjd *	still can't use those features.
115168404Spjd *
116168404Spjd * We're already including a number of other <linux/XXX.h> headers, and
117168404Spjd * this code is Linux-specific (no other OS has PF_PACKET sockets as
118168404Spjd * a raw packet capture mechanism), so it's not as if you gain any
119168404Spjd * useful portability by using <netpacket/packet.h>
120168404Spjd *
121168404Spjd * XXX - should we just include <linux/if_packet.h> even if PF_PACKET
122168404Spjd * isn't defined?  It only defines one data structure in 2.0.x, so
123168404Spjd * it shouldn't cause any problems.
124168404Spjd */
125168404Spjd#ifdef PF_PACKET
126168404Spjd# include <linux/if_packet.h>
127168404Spjd
128168404Spjd /*
129168404Spjd  * On at least some Linux distributions (for example, Red Hat 5.2),
130168404Spjd  * there's no <netpacket/packet.h> file, but PF_PACKET is defined if
131168404Spjd  * you include <sys/socket.h>, but <linux/if_packet.h> doesn't define
132168404Spjd  * any of the PF_PACKET stuff such as "struct sockaddr_ll" or any of
133168404Spjd  * the PACKET_xxx stuff.
134168404Spjd  *
135168404Spjd  * So we check whether PACKET_HOST is defined, and assume that we have
136168404Spjd  * PF_PACKET sockets only if it is defined.
137168404Spjd  */
138168404Spjd# ifdef PACKET_HOST
139168404Spjd#  define HAVE_PF_PACKET_SOCKETS
140168404Spjd# endif /* PACKET_HOST */
141168404Spjd#endif /* PF_PACKET */
142168404Spjd
143168404Spjd#ifdef SO_ATTACH_FILTER
144168404Spjd#include <linux/types.h>
145168404Spjd#include <linux/filter.h>
146168404Spjd#endif
147168404Spjd
148168404Spjd#ifndef __GLIBC__
149168404Spjdtypedef int		socklen_t;
150168404Spjd#endif
151168404Spjd
152168404Spjd#ifndef MSG_TRUNC
153168404Spjd/*
154168404Spjd * This is being compiled on a system that lacks MSG_TRUNC; define it
155168404Spjd * with the value it has in the 2.2 and later kernels, so that, on
156168404Spjd * those kernels, when we pass it in the flags argument to "recvfrom()"
157168404Spjd * we're passing the right value and thus get the MSG_TRUNC behavior
158168404Spjd * we want.  (We don't get that behavior on 2.0[.x] kernels, because
159168404Spjd * they didn't support MSG_TRUNC.)
160168404Spjd */
161168404Spjd#define MSG_TRUNC	0x20
162168404Spjd#endif
163168404Spjd
164168404Spjd#ifndef SOL_PACKET
165168404Spjd/*
166168404Spjd * This is being compiled on a system that lacks SOL_PACKET; define it
167168404Spjd * with the value it has in the 2.2 and later kernels, so that we can
168168404Spjd * set promiscuous mode in the good modern way rather than the old
169168404Spjd * 2.0-kernel crappy way.
170168404Spjd */
171168404Spjd#define SOL_PACKET	263
172168404Spjd#endif
173168404Spjd
174168404Spjd#define MAX_LINKHEADER_SIZE	256
175168404Spjd
176168404Spjd/*
177168404Spjd * When capturing on all interfaces we use this as the buffer size.
178168404Spjd * Should be bigger then all MTUs that occur in real life.
179168404Spjd * 64kB should be enough for now.
180168404Spjd */
181168404Spjd#define BIGGER_THAN_ALL_MTUS	(64*1024)
182168404Spjd
183168404Spjd/*
184168404Spjd * Prototypes for internal functions
185168404Spjd */
186168404Spjdstatic void map_arphrd_to_dlt(pcap_t *, int, int);
187168404Spjdstatic int live_open_old(pcap_t *, const char *, int, int, char *);
188168404Spjdstatic int live_open_new(pcap_t *, const char *, int, int, char *);
189168404Spjdstatic int pcap_read_linux(pcap_t *, int, pcap_handler, u_char *);
190168404Spjdstatic int pcap_read_packet(pcap_t *, pcap_handler, u_char *);
191168404Spjdstatic int pcap_inject_linux(pcap_t *, const void *, size_t);
192168404Spjdstatic int pcap_stats_linux(pcap_t *, struct pcap_stat *);
193168404Spjdstatic int pcap_setfilter_linux(pcap_t *, struct bpf_program *);
194168404Spjdstatic void pcap_close_linux(pcap_t *);
195168404Spjd
196168404Spjd/*
197168404Spjd * Wrap some ioctl calls
198168404Spjd */
199168404Spjd#ifdef HAVE_PF_PACKET_SOCKETS
200168404Spjdstatic int	iface_get_id(int fd, const char *device, char *ebuf);
201168404Spjd#endif
202168404Spjdstatic int	iface_get_mtu(int fd, const char *device, char *ebuf);
203168404Spjdstatic int 	iface_get_arptype(int fd, const char *device, char *ebuf);
204168404Spjd#ifdef HAVE_PF_PACKET_SOCKETS
205168404Spjdstatic int 	iface_bind(int fd, int ifindex, char *ebuf);
206168404Spjd#endif
207168404Spjdstatic int 	iface_bind_old(int fd, const char *device, char *ebuf);
208168404Spjd
209168404Spjd#ifdef SO_ATTACH_FILTER
210168404Spjdstatic int	fix_program(pcap_t *handle, struct sock_fprog *fcode);
211168404Spjdstatic int	fix_offset(struct bpf_insn *p);
212168404Spjdstatic int	set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode);
213168404Spjdstatic int	reset_kernel_filter(pcap_t *handle);
214168404Spjd
215168404Spjdstatic struct sock_filter	total_insn
216168404Spjd	= BPF_STMT(BPF_RET | BPF_K, 0);
217168404Spjdstatic struct sock_fprog	total_fcode
218168404Spjd	= { 1, &total_insn };
219168404Spjd#endif
220168404Spjd
221168404Spjd/*
222168404Spjd *  Get a handle for a live capture from the given device. You can
223168404Spjd *  pass NULL as device to get all packages (without link level
224168404Spjd *  information of course). If you pass 1 as promisc the interface
225168404Spjd *  will be set to promiscous mode (XXX: I think this usage should
226168404Spjd *  be deprecated and functions be added to select that later allow
227168404Spjd *  modification of that values -- Torsten).
228168404Spjd *
229168404Spjd *  See also pcap(3).
230168404Spjd */
231168404Spjdpcap_t *
232168404Spjdpcap_open_live(const char *device, int snaplen, int promisc, int to_ms,
233168404Spjd    char *ebuf)
234168404Spjd{
235168404Spjd	pcap_t		*handle;
236168404Spjd	int		mtu;
237168404Spjd	int		err;
238168404Spjd	int		live_open_ok = 0;
239168404Spjd	struct utsname	utsname;
240168404Spjd
241168404Spjd#ifdef HAVE_DAG_API
242168404Spjd	if (strstr(device, "dag")) {
243168404Spjd		return dag_open_live(device, snaplen, promisc, to_ms, ebuf);
244168404Spjd	}
245168404Spjd#endif /* HAVE_DAG_API */
246168404Spjd
247168404Spjd        /* Allocate a handle for this session. */
248168404Spjd
249168404Spjd	handle = malloc(sizeof(*handle));
250168404Spjd	if (handle == NULL) {
251168404Spjd		snprintf(ebuf, PCAP_ERRBUF_SIZE, "malloc: %s",
252168404Spjd			 pcap_strerror(errno));
253168404Spjd		return NULL;
254168404Spjd	}
255168404Spjd
256168404Spjd	/* Initialize some components of the pcap structure. */
257168404Spjd
258168404Spjd	memset(handle, 0, sizeof(*handle));
259168404Spjd	handle->snapshot	= snaplen;
260168404Spjd	handle->md.timeout	= to_ms;
261168404Spjd
262168404Spjd	/*
263168404Spjd	 * NULL and "any" are special devices which give us the hint to
264168404Spjd	 * monitor all devices.
265168404Spjd	 */
266168404Spjd	if (!device || strcmp(device, "any") == 0) {
267168404Spjd		device			= NULL;
268168404Spjd		handle->md.device	= strdup("any");
269168404Spjd		if (promisc) {
270168404Spjd			promisc = 0;
271168404Spjd			/* Just a warning. */
272168404Spjd			snprintf(ebuf, PCAP_ERRBUF_SIZE,
273168404Spjd			    "Promiscuous mode not supported on the \"any\" device");
274168404Spjd		}
275168404Spjd
276168404Spjd	} else
277168404Spjd		handle->md.device	= strdup(device);
278168404Spjd
279168404Spjd	if (handle->md.device == NULL) {
280168404Spjd		snprintf(ebuf, PCAP_ERRBUF_SIZE, "strdup: %s",
281168404Spjd			 pcap_strerror(errno) );
282168404Spjd		free(handle);
283168404Spjd		return NULL;
284168404Spjd	}
285168404Spjd
286168404Spjd	/*
287168404Spjd	 * Current Linux kernels use the protocol family PF_PACKET to
288168404Spjd	 * allow direct access to all packets on the network while
289168404Spjd	 * older kernels had a special socket type SOCK_PACKET to
290168404Spjd	 * implement this feature.
291168404Spjd	 * While this old implementation is kind of obsolete we need
292168404Spjd	 * to be compatible with older kernels for a while so we are
293168404Spjd	 * trying both methods with the newer method preferred.
294168404Spjd	 */
295168404Spjd
296168404Spjd	if ((err = live_open_new(handle, device, promisc, to_ms, ebuf)) == 1)
297168404Spjd		live_open_ok = 1;
298168404Spjd	else if (err == 0) {
299168404Spjd		/* Non-fatal error; try old way */
300168404Spjd		if (live_open_old(handle, device, promisc, to_ms, ebuf))
301168404Spjd			live_open_ok = 1;
302168404Spjd	}
303168404Spjd	if (!live_open_ok) {
304168404Spjd		/*
305168404Spjd		 * Both methods to open the packet socket failed. Tidy
306168404Spjd		 * up and report our failure (ebuf is expected to be
307168404Spjd		 * set by the functions above).
308168404Spjd		 */
309168404Spjd
310168404Spjd		if (handle->md.device != NULL)
311168404Spjd			free(handle->md.device);
312168404Spjd		free(handle);
313168404Spjd		return NULL;
314168404Spjd	}
315168404Spjd
316168404Spjd	/*
317168404Spjd	 * Compute the buffer size.
318168404Spjd	 *
319168404Spjd	 * If we're using SOCK_PACKET, this might be a 2.0[.x] kernel,
320168404Spjd	 * and might require special handling - check.
321168404Spjd	 */
322168404Spjd	if (handle->md.sock_packet && (uname(&utsname) < 0 ||
323168404Spjd	    strncmp(utsname.release, "2.0", 3) == 0)) {
324168404Spjd		/*
325168404Spjd		 * We're using a SOCK_PACKET structure, and either
326168404Spjd		 * we couldn't find out what kernel release this is,
327168404Spjd		 * or it's a 2.0[.x] kernel.
328168404Spjd		 *
329168404Spjd		 * In the 2.0[.x] kernel, a "recvfrom()" on
330168404Spjd		 * a SOCK_PACKET socket, with MSG_TRUNC set, will
331168404Spjd		 * return the number of bytes read, so if we pass
332		 * a length based on the snapshot length, it'll
333		 * return the number of bytes from the packet
334		 * copied to userland, not the actual length
335		 * of the packet.
336		 *
337		 * This means that, for example, the IP dissector
338		 * in tcpdump will get handed a packet length less
339		 * than the length in the IP header, and will
340		 * complain about "truncated-ip".
341		 *
342		 * So we don't bother trying to copy from the
343		 * kernel only the bytes in which we're interested,
344		 * but instead copy them all, just as the older
345		 * versions of libpcap for Linux did.
346		 *
347		 * The buffer therefore needs to be big enough to
348		 * hold the largest packet we can get from this
349		 * device.  Unfortunately, we can't get the MRU
350		 * of the network; we can only get the MTU.  The
351		 * MTU may be too small, in which case a packet larger
352		 * than the buffer size will be truncated *and* we
353		 * won't get the actual packet size.
354		 *
355		 * However, if the snapshot length is larger than
356		 * the buffer size based on the MTU, we use the
357		 * snapshot length as the buffer size, instead;
358		 * this means that with a sufficiently large snapshot
359		 * length we won't artificially truncate packets
360		 * to the MTU-based size.
361		 *
362		 * This mess just one of many problems with packet
363		 * capture on 2.0[.x] kernels; you really want a
364		 * 2.2[.x] or later kernel if you want packet capture
365		 * to work well.
366		 */
367		mtu = iface_get_mtu(handle->fd, device, ebuf);
368		if (mtu == -1) {
369			pcap_close_linux(handle);
370			free(handle);
371			return NULL;
372		}
373		handle->bufsize = MAX_LINKHEADER_SIZE + mtu;
374		if (handle->bufsize < handle->snapshot)
375			handle->bufsize = handle->snapshot;
376	} else {
377		/*
378		 * This is a 2.2[.x] or later kernel (we know that
379		 * either because we're not using a SOCK_PACKET
380		 * socket - PF_PACKET is supported only in 2.2
381		 * and later kernels - or because we checked the
382		 * kernel version).
383		 *
384		 * We can safely pass "recvfrom()" a byte count
385		 * based on the snapshot length.
386		 */
387		handle->bufsize = handle->snapshot;
388	}
389
390	/* Allocate the buffer */
391
392	handle->buffer	 = malloc(handle->bufsize + handle->offset);
393	if (!handle->buffer) {
394	        snprintf(ebuf, PCAP_ERRBUF_SIZE,
395			 "malloc: %s", pcap_strerror(errno));
396		pcap_close_linux(handle);
397		free(handle);
398		return NULL;
399	}
400
401	/*
402	 * "handle->fd" is a socket, so "select()" and "poll()"
403	 * should work on it.
404	 */
405	handle->selectable_fd = handle->fd;
406
407	handle->read_op = pcap_read_linux;
408	handle->inject_op = pcap_inject_linux;
409	handle->setfilter_op = pcap_setfilter_linux;
410	handle->set_datalink_op = NULL;	/* can't change data link type */
411	handle->getnonblock_op = pcap_getnonblock_fd;
412	handle->setnonblock_op = pcap_setnonblock_fd;
413	handle->stats_op = pcap_stats_linux;
414	handle->close_op = pcap_close_linux;
415
416	return handle;
417}
418
419/*
420 *  Read at most max_packets from the capture stream and call the callback
421 *  for each of them. Returns the number of packets handled or -1 if an
422 *  error occured.
423 */
424static int
425pcap_read_linux(pcap_t *handle, int max_packets, pcap_handler callback, u_char *user)
426{
427	/*
428	 * Currently, on Linux only one packet is delivered per read,
429	 * so we don't loop.
430	 */
431	return pcap_read_packet(handle, callback, user);
432}
433
434/*
435 *  Read a packet from the socket calling the handler provided by
436 *  the user. Returns the number of packets received or -1 if an
437 *  error occured.
438 */
439static int
440pcap_read_packet(pcap_t *handle, pcap_handler callback, u_char *userdata)
441{
442	u_char			*bp;
443	int			offset;
444#ifdef HAVE_PF_PACKET_SOCKETS
445	struct sockaddr_ll	from;
446	struct sll_header	*hdrp;
447#else
448	struct sockaddr		from;
449#endif
450	socklen_t		fromlen;
451	int			packet_len, caplen;
452	struct pcap_pkthdr	pcap_header;
453
454#ifdef HAVE_PF_PACKET_SOCKETS
455	/*
456	 * If this is a cooked device, leave extra room for a
457	 * fake packet header.
458	 */
459	if (handle->md.cooked)
460		offset = SLL_HDR_LEN;
461	else
462		offset = 0;
463#else
464	/*
465	 * This system doesn't have PF_PACKET sockets, so it doesn't
466	 * support cooked devices.
467	 */
468	offset = 0;
469#endif
470
471	/* Receive a single packet from the kernel */
472
473	bp = handle->buffer + handle->offset;
474	do {
475		/*
476		 * Has "pcap_breakloop()" been called?
477		 */
478		if (handle->break_loop) {
479			/*
480			 * Yes - clear the flag that indicates that it
481			 * has, and return -2 as an indication that we
482			 * were told to break out of the loop.
483			 */
484			handle->break_loop = 0;
485			return -2;
486		}
487		fromlen = sizeof(from);
488		packet_len = recvfrom(
489			handle->fd, bp + offset,
490			handle->bufsize - offset, MSG_TRUNC,
491			(struct sockaddr *) &from, &fromlen);
492	} while (packet_len == -1 && errno == EINTR);
493
494	/* Check if an error occured */
495
496	if (packet_len == -1) {
497		if (errno == EAGAIN)
498			return 0;	/* no packet there */
499		else {
500			snprintf(handle->errbuf, sizeof(handle->errbuf),
501				 "recvfrom: %s", pcap_strerror(errno));
502			return -1;
503		}
504	}
505
506#ifdef HAVE_PF_PACKET_SOCKETS
507	/*
508	 * If this is from the loopback device, reject outgoing packets;
509	 * we'll see the packet as an incoming packet as well, and
510	 * we don't want to see it twice.
511	 *
512	 * We can only do this if we're using PF_PACKET; the address
513	 * returned for SOCK_PACKET is a "sockaddr_pkt" which lacks
514	 * the relevant packet type information.
515	 */
516	if (!handle->md.sock_packet &&
517	    from.sll_ifindex == handle->md.lo_ifindex &&
518	    from.sll_pkttype == PACKET_OUTGOING)
519		return 0;
520#endif
521
522#ifdef HAVE_PF_PACKET_SOCKETS
523	/*
524	 * If this is a cooked device, fill in the fake packet header.
525	 */
526	if (handle->md.cooked) {
527		/*
528		 * Add the length of the fake header to the length
529		 * of packet data we read.
530		 */
531		packet_len += SLL_HDR_LEN;
532
533		hdrp = (struct sll_header *)bp;
534
535		/*
536		 * Map the PACKET_ value to a LINUX_SLL_ value; we
537		 * want the same numerical value to be used in
538		 * the link-layer header even if the numerical values
539		 * for the PACKET_ #defines change, so that programs
540		 * that look at the packet type field will always be
541		 * able to handle DLT_LINUX_SLL captures.
542		 */
543		switch (from.sll_pkttype) {
544
545		case PACKET_HOST:
546			hdrp->sll_pkttype = htons(LINUX_SLL_HOST);
547			break;
548
549		case PACKET_BROADCAST:
550			hdrp->sll_pkttype = htons(LINUX_SLL_BROADCAST);
551			break;
552
553		case PACKET_MULTICAST:
554			hdrp->sll_pkttype = htons(LINUX_SLL_MULTICAST);
555			break;
556
557		case PACKET_OTHERHOST:
558			hdrp->sll_pkttype = htons(LINUX_SLL_OTHERHOST);
559			break;
560
561		case PACKET_OUTGOING:
562			hdrp->sll_pkttype = htons(LINUX_SLL_OUTGOING);
563			break;
564
565		default:
566			hdrp->sll_pkttype = -1;
567			break;
568		}
569
570		hdrp->sll_hatype = htons(from.sll_hatype);
571		hdrp->sll_halen = htons(from.sll_halen);
572		memcpy(hdrp->sll_addr, from.sll_addr,
573		    (from.sll_halen > SLL_ADDRLEN) ?
574		      SLL_ADDRLEN :
575		      from.sll_halen);
576		hdrp->sll_protocol = from.sll_protocol;
577	}
578#endif
579
580	/*
581	 * XXX: According to the kernel source we should get the real
582	 * packet len if calling recvfrom with MSG_TRUNC set. It does
583	 * not seem to work here :(, but it is supported by this code
584	 * anyway.
585	 * To be honest the code RELIES on that feature so this is really
586	 * broken with 2.2.x kernels.
587	 * I spend a day to figure out what's going on and I found out
588	 * that the following is happening:
589	 *
590	 * The packet comes from a random interface and the packet_rcv
591	 * hook is called with a clone of the packet. That code inserts
592	 * the packet into the receive queue of the packet socket.
593	 * If a filter is attached to that socket that filter is run
594	 * first - and there lies the problem. The default filter always
595	 * cuts the packet at the snaplen:
596	 *
597	 * # tcpdump -d
598	 * (000) ret      #68
599	 *
600	 * So the packet filter cuts down the packet. The recvfrom call
601	 * says "hey, it's only 68 bytes, it fits into the buffer" with
602	 * the result that we don't get the real packet length. This
603	 * is valid at least until kernel 2.2.17pre6.
604	 *
605	 * We currently handle this by making a copy of the filter
606	 * program, fixing all "ret" instructions with non-zero
607	 * operands to have an operand of 65535 so that the filter
608	 * doesn't truncate the packet, and supplying that modified
609	 * filter to the kernel.
610	 */
611
612	caplen = packet_len;
613	if (caplen > handle->snapshot)
614		caplen = handle->snapshot;
615
616	/* Run the packet filter if not using kernel filter */
617	if (!handle->md.use_bpf && handle->fcode.bf_insns) {
618		if (bpf_filter(handle->fcode.bf_insns, bp,
619		                packet_len, caplen) == 0)
620		{
621			/* rejected by filter */
622			return 0;
623		}
624	}
625
626	/* Fill in our own header data */
627
628	if (ioctl(handle->fd, SIOCGSTAMP, &pcap_header.ts) == -1) {
629		snprintf(handle->errbuf, sizeof(handle->errbuf),
630			 "ioctl: %s", pcap_strerror(errno));
631		return -1;
632	}
633	pcap_header.caplen	= caplen;
634	pcap_header.len		= packet_len;
635
636	/*
637	 * Count the packet.
638	 *
639	 * Arguably, we should count them before we check the filter,
640	 * as on many other platforms "ps_recv" counts packets
641	 * handed to the filter rather than packets that passed
642	 * the filter, but if filtering is done in the kernel, we
643	 * can't get a count of packets that passed the filter,
644	 * and that would mean the meaning of "ps_recv" wouldn't
645	 * be the same on all Linux systems.
646	 *
647	 * XXX - it's not the same on all systems in any case;
648	 * ideally, we should have a "get the statistics" call
649	 * that supplies more counts and indicates which of them
650	 * it supplies, so that we supply a count of packets
651	 * handed to the filter only on platforms where that
652	 * information is available.
653	 *
654	 * We count them here even if we can get the packet count
655	 * from the kernel, as we can only determine at run time
656	 * whether we'll be able to get it from the kernel (if
657	 * HAVE_TPACKET_STATS isn't defined, we can't get it from
658	 * the kernel, but if it is defined, the library might
659	 * have been built with a 2.4 or later kernel, but we
660	 * might be running on a 2.2[.x] kernel without Alexey
661	 * Kuznetzov's turbopacket patches, and thus the kernel
662	 * might not be able to supply those statistics).  We
663	 * could, I guess, try, when opening the socket, to get
664	 * the statistics, and if we can not increment the count
665	 * here, but it's not clear that always incrementing
666	 * the count is more expensive than always testing a flag
667	 * in memory.
668	 */
669	handle->md.stat.ps_recv++;
670
671	/* Call the user supplied callback function */
672	callback(userdata, &pcap_header, bp);
673
674	return 1;
675}
676
677static int
678pcap_inject_linux(pcap_t *handle, const void *buf, size_t size)
679{
680	int ret;
681
682#ifdef HAVE_PF_PACKET_SOCKETS
683	if (!handle->md.sock_packet) {
684		/* PF_PACKET socket */
685		if (handle->md.ifindex == -1) {
686			/*
687			 * We don't support sending on the "any" device.
688			 */
689			strlcpy(handle->errbuf,
690			    "Sending packets isn't supported on the \"any\" device",
691			    PCAP_ERRBUF_SIZE);
692			return (-1);
693		}
694
695		if (handle->md.cooked) {
696			/*
697			 * We don't support sending on the "any" device.
698			 *
699			 * XXX - how do you send on a bound cooked-mode
700			 * socket?
701			 * Is a "sendto()" required there?
702			 */
703			strlcpy(handle->errbuf,
704			    "Sending packets isn't supported in cooked mode",
705			    PCAP_ERRBUF_SIZE);
706			return (-1);
707		}
708	}
709#endif
710
711	ret = send(handle->fd, buf, size, 0);
712	if (ret == -1) {
713		snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "send: %s",
714		    pcap_strerror(errno));
715		return (-1);
716	}
717	return (ret);
718}
719
720/*
721 *  Get the statistics for the given packet capture handle.
722 *  Reports the number of dropped packets iff the kernel supports
723 *  the PACKET_STATISTICS "getsockopt()" argument (2.4 and later
724 *  kernels, and 2.2[.x] kernels with Alexey Kuznetzov's turbopacket
725 *  patches); otherwise, that information isn't available, and we lie
726 *  and report 0 as the count of dropped packets.
727 */
728static int
729pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats)
730{
731#ifdef HAVE_TPACKET_STATS
732	struct tpacket_stats kstats;
733	socklen_t len = sizeof (struct tpacket_stats);
734#endif
735
736#ifdef HAVE_TPACKET_STATS
737	/*
738	 * Try to get the packet counts from the kernel.
739	 */
740	if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS,
741			&kstats, &len) > -1) {
742		/*
743		 * In "linux/net/packet/af_packet.c", at least in the
744		 * 2.4.9 kernel, "tp_packets" is incremented for every
745		 * packet that passes the packet filter *and* is
746		 * successfully queued on the socket; "tp_drops" is
747		 * incremented for every packet dropped because there's
748		 * not enough free space in the socket buffer.
749		 *
750		 * When the statistics are returned for a PACKET_STATISTICS
751		 * "getsockopt()" call, "tp_drops" is added to "tp_packets",
752		 * so that "tp_packets" counts all packets handed to
753		 * the PF_PACKET socket, including packets dropped because
754		 * there wasn't room on the socket buffer - but not
755		 * including packets that didn't pass the filter.
756		 *
757		 * In the BSD BPF, the count of received packets is
758		 * incremented for every packet handed to BPF, regardless
759		 * of whether it passed the filter.
760		 *
761		 * We can't make "pcap_stats()" work the same on both
762		 * platforms, but the best approximation is to return
763		 * "tp_packets" as the count of packets and "tp_drops"
764		 * as the count of drops.
765		 *
766		 * Keep a running total because each call to
767		 *    getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, ....
768		 * resets the counters to zero.
769		 */
770		handle->md.stat.ps_recv += kstats.tp_packets;
771		handle->md.stat.ps_drop += kstats.tp_drops;
772	}
773	else
774	{
775		/*
776		 * If the error was EOPNOTSUPP, fall through, so that
777		 * if you build the library on a system with
778		 * "struct tpacket_stats" and run it on a system
779		 * that doesn't, it works as it does if the library
780		 * is built on a system without "struct tpacket_stats".
781		 */
782		if (errno != EOPNOTSUPP) {
783			snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
784			    "pcap_stats: %s", pcap_strerror(errno));
785			return -1;
786		}
787	}
788#endif
789	/*
790	 * On systems where the PACKET_STATISTICS "getsockopt()" argument
791	 * is supported on PF_PACKET sockets:
792	 *
793	 *	"ps_recv" counts only packets that *passed* the filter,
794	 *	not packets that didn't pass the filter.  This includes
795	 *	packets later dropped because we ran out of buffer space.
796	 *
797	 *	"ps_drop" counts packets dropped because we ran out of
798	 *	buffer space.  It doesn't count packets dropped by the
799	 *	interface driver.  It counts only packets that passed
800	 *	the filter.
801	 *
802	 *	Both statistics include packets not yet read from the
803	 *	kernel by libpcap, and thus not yet seen by the application.
804	 *
805	 * On systems where the PACKET_STATISTICS "getsockopt()" argument
806	 * is not supported on PF_PACKET sockets:
807	 *
808	 *	"ps_recv" counts only packets that *passed* the filter,
809	 *	not packets that didn't pass the filter.  It does not
810	 *	count packets dropped because we ran out of buffer
811	 *	space.
812	 *
813	 *	"ps_drop" is not supported.
814	 *
815	 *	"ps_recv" doesn't include packets not yet read from
816	 *	the kernel by libpcap.
817	 */
818	*stats = handle->md.stat;
819	return 0;
820}
821
822/*
823 * Description string for the "any" device.
824 */
825static const char any_descr[] = "Pseudo-device that captures on all interfaces";
826
827int
828pcap_platform_finddevs(pcap_if_t **alldevsp, char *errbuf)
829{
830	if (pcap_add_if(alldevsp, "any", 0, any_descr, errbuf) < 0)
831		return (-1);
832
833#ifdef HAVE_DAG_API
834	if (dag_platform_finddevs(alldevsp, errbuf) < 0)
835		return (-1);
836#endif /* HAVE_DAG_API */
837
838	return (0);
839}
840
841/*
842 *  Attach the given BPF code to the packet capture device.
843 */
844static int
845pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter)
846{
847#ifdef SO_ATTACH_FILTER
848	struct sock_fprog	fcode;
849	int			can_filter_in_kernel;
850	int			err = 0;
851#endif
852
853	if (!handle)
854		return -1;
855	if (!filter) {
856	        strncpy(handle->errbuf, "setfilter: No filter specified",
857			sizeof(handle->errbuf));
858		return -1;
859	}
860
861	/* Make our private copy of the filter */
862
863	if (install_bpf_program(handle, filter) < 0)
864		/* install_bpf_program() filled in errbuf */
865		return -1;
866
867	/*
868	 * Run user level packet filter by default. Will be overriden if
869	 * installing a kernel filter succeeds.
870	 */
871	handle->md.use_bpf = 0;
872
873	/* Install kernel level filter if possible */
874
875#ifdef SO_ATTACH_FILTER
876#ifdef USHRT_MAX
877	if (handle->fcode.bf_len > USHRT_MAX) {
878		/*
879		 * fcode.len is an unsigned short for current kernel.
880		 * I have yet to see BPF-Code with that much
881		 * instructions but still it is possible. So for the
882		 * sake of correctness I added this check.
883		 */
884		fprintf(stderr, "Warning: Filter too complex for kernel\n");
885		fcode.filter = NULL;
886		can_filter_in_kernel = 0;
887	} else
888#endif /* USHRT_MAX */
889	{
890		/*
891		 * Oh joy, the Linux kernel uses struct sock_fprog instead
892		 * of struct bpf_program and of course the length field is
893		 * of different size. Pointed out by Sebastian
894		 *
895		 * Oh, and we also need to fix it up so that all "ret"
896		 * instructions with non-zero operands have 65535 as the
897		 * operand, and so that, if we're in cooked mode, all
898		 * memory-reference instructions use special magic offsets
899		 * in references to the link-layer header and assume that
900		 * the link-layer payload begins at 0; "fix_program()"
901		 * will do that.
902		 */
903		switch (fix_program(handle, &fcode)) {
904
905		case -1:
906		default:
907			/*
908			 * Fatal error; just quit.
909			 * (The "default" case shouldn't happen; we
910			 * return -1 for that reason.)
911			 */
912			return -1;
913
914		case 0:
915			/*
916			 * The program performed checks that we can't make
917			 * work in the kernel.
918			 */
919			can_filter_in_kernel = 0;
920			break;
921
922		case 1:
923			/*
924			 * We have a filter that'll work in the kernel.
925			 */
926			can_filter_in_kernel = 1;
927			break;
928		}
929	}
930
931	if (can_filter_in_kernel) {
932		if ((err = set_kernel_filter(handle, &fcode)) == 0)
933		{
934			/* Installation succeded - using kernel filter. */
935			handle->md.use_bpf = 1;
936		}
937		else if (err == -1)	/* Non-fatal error */
938		{
939			/*
940			 * Print a warning if we weren't able to install
941			 * the filter for a reason other than "this kernel
942			 * isn't configured to support socket filters.
943			 */
944			if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) {
945				fprintf(stderr,
946				    "Warning: Kernel filter failed: %s\n",
947					pcap_strerror(errno));
948			}
949		}
950	}
951
952	/*
953	 * If we're not using the kernel filter, get rid of any kernel
954	 * filter that might've been there before, e.g. because the
955	 * previous filter could work in the kernel, or because some other
956	 * code attached a filter to the socket by some means other than
957	 * calling "pcap_setfilter()".  Otherwise, the kernel filter may
958	 * filter out packets that would pass the new userland filter.
959	 */
960	if (!handle->md.use_bpf)
961		reset_kernel_filter(handle);
962
963	/*
964	 * Free up the copy of the filter that was made by "fix_program()".
965	 */
966	if (fcode.filter != NULL)
967		free(fcode.filter);
968
969	if (err == -2)
970		/* Fatal error */
971		return -1;
972#endif /* SO_ATTACH_FILTER */
973
974	return 0;
975}
976
977/*
978 *  Linux uses the ARP hardware type to identify the type of an
979 *  interface. pcap uses the DLT_xxx constants for this. This
980 *  function takes a pointer to a "pcap_t", and an ARPHRD_xxx
981 *  constant, as arguments, and sets "handle->linktype" to the
982 *  appropriate DLT_XXX constant and sets "handle->offset" to
983 *  the appropriate value (to make "handle->offset" plus link-layer
984 *  header length be a multiple of 4, so that the link-layer payload
985 *  will be aligned on a 4-byte boundary when capturing packets).
986 *  (If the offset isn't set here, it'll be 0; add code as appropriate
987 *  for cases where it shouldn't be 0.)
988 *
989 *  If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture
990 *  in cooked mode; otherwise, we can't use cooked mode, so we have
991 *  to pick some type that works in raw mode, or fail.
992 *
993 *  Sets the link type to -1 if unable to map the type.
994 */
995static void map_arphrd_to_dlt(pcap_t *handle, int arptype, int cooked_ok)
996{
997	switch (arptype) {
998
999	case ARPHRD_ETHER:
1000		/*
1001		 * This is (presumably) a real Ethernet capture; give it a
1002		 * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so
1003		 * that an application can let you choose it, in case you're
1004		 * capturing DOCSIS traffic that a Cisco Cable Modem
1005		 * Termination System is putting out onto an Ethernet (it
1006		 * doesn't put an Ethernet header onto the wire, it puts raw
1007		 * DOCSIS frames out on the wire inside the low-level
1008		 * Ethernet framing).
1009		 *
1010		 * XXX - are there any sorts of "fake Ethernet" that have
1011		 * ARPHRD_ETHER but that *shouldn't offer DLT_DOCSIS as
1012		 * a Cisco CMTS won't put traffic onto it or get traffic
1013		 * bridged onto it?  ISDN is handled in "live_open_new()",
1014		 * as we fall back on cooked mode there; are there any
1015		 * others?
1016		 */
1017		handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
1018		/*
1019		 * If that fails, just leave the list empty.
1020		 */
1021		if (handle->dlt_list != NULL) {
1022			handle->dlt_list[0] = DLT_EN10MB;
1023			handle->dlt_list[1] = DLT_DOCSIS;
1024			handle->dlt_count = 2;
1025		}
1026		/* FALLTHROUGH */
1027
1028	case ARPHRD_METRICOM:
1029	case ARPHRD_LOOPBACK:
1030		handle->linktype = DLT_EN10MB;
1031		handle->offset = 2;
1032		break;
1033
1034	case ARPHRD_EETHER:
1035		handle->linktype = DLT_EN3MB;
1036		break;
1037
1038	case ARPHRD_AX25:
1039		handle->linktype = DLT_AX25;
1040		break;
1041
1042	case ARPHRD_PRONET:
1043		handle->linktype = DLT_PRONET;
1044		break;
1045
1046	case ARPHRD_CHAOS:
1047		handle->linktype = DLT_CHAOS;
1048		break;
1049
1050#ifndef ARPHRD_IEEE802_TR
1051#define ARPHRD_IEEE802_TR 800	/* From Linux 2.4 */
1052#endif
1053	case ARPHRD_IEEE802_TR:
1054	case ARPHRD_IEEE802:
1055		handle->linktype = DLT_IEEE802;
1056		handle->offset = 2;
1057		break;
1058
1059	case ARPHRD_ARCNET:
1060		handle->linktype = DLT_ARCNET_LINUX;
1061		break;
1062
1063#ifndef ARPHRD_FDDI	/* From Linux 2.2.13 */
1064#define ARPHRD_FDDI	774
1065#endif
1066	case ARPHRD_FDDI:
1067		handle->linktype = DLT_FDDI;
1068		handle->offset = 3;
1069		break;
1070
1071#ifndef ARPHRD_ATM  /* FIXME: How to #include this? */
1072#define ARPHRD_ATM 19
1073#endif
1074	case ARPHRD_ATM:
1075		/*
1076		 * The Classical IP implementation in ATM for Linux
1077		 * supports both what RFC 1483 calls "LLC Encapsulation",
1078		 * in which each packet has an LLC header, possibly
1079		 * with a SNAP header as well, prepended to it, and
1080		 * what RFC 1483 calls "VC Based Multiplexing", in which
1081		 * different virtual circuits carry different network
1082		 * layer protocols, and no header is prepended to packets.
1083		 *
1084		 * They both have an ARPHRD_ type of ARPHRD_ATM, so
1085		 * you can't use the ARPHRD_ type to find out whether
1086		 * captured packets will have an LLC header, and,
1087		 * while there's a socket ioctl to *set* the encapsulation
1088		 * type, there's no ioctl to *get* the encapsulation type.
1089		 *
1090		 * This means that
1091		 *
1092		 *	programs that dissect Linux Classical IP frames
1093		 *	would have to check for an LLC header and,
1094		 *	depending on whether they see one or not, dissect
1095		 *	the frame as LLC-encapsulated or as raw IP (I
1096		 *	don't know whether there's any traffic other than
1097		 *	IP that would show up on the socket, or whether
1098		 *	there's any support for IPv6 in the Linux
1099		 *	Classical IP code);
1100		 *
1101		 *	filter expressions would have to compile into
1102		 *	code that checks for an LLC header and does
1103		 *	the right thing.
1104		 *
1105		 * Both of those are a nuisance - and, at least on systems
1106		 * that support PF_PACKET sockets, we don't have to put
1107		 * up with those nuisances; instead, we can just capture
1108		 * in cooked mode.  That's what we'll do, if we can.
1109		 * Otherwise, we'll just fail.
1110		 */
1111		if (cooked_ok)
1112			handle->linktype = DLT_LINUX_SLL;
1113		else
1114			handle->linktype = -1;
1115		break;
1116
1117#ifndef ARPHRD_IEEE80211  /* From Linux 2.4.6 */
1118#define ARPHRD_IEEE80211 801
1119#endif
1120	case ARPHRD_IEEE80211:
1121		handle->linktype = DLT_IEEE802_11;
1122		break;
1123
1124#ifndef ARPHRD_IEEE80211_PRISM  /* From Linux 2.4.18 */
1125#define ARPHRD_IEEE80211_PRISM 802
1126#endif
1127	case ARPHRD_IEEE80211_PRISM:
1128		handle->linktype = DLT_PRISM_HEADER;
1129		break;
1130
1131	case ARPHRD_PPP:
1132		/*
1133		 * Some PPP code in the kernel supplies no link-layer
1134		 * header whatsoever to PF_PACKET sockets; other PPP
1135		 * code supplies PPP link-layer headers ("syncppp.c");
1136		 * some PPP code might supply random link-layer
1137		 * headers (PPP over ISDN - there's code in Ethereal,
1138		 * for example, to cope with PPP-over-ISDN captures
1139		 * with which the Ethereal developers have had to cope,
1140		 * heuristically trying to determine which of the
1141		 * oddball link-layer headers particular packets have).
1142		 *
1143		 * As such, we just punt, and run all PPP interfaces
1144		 * in cooked mode, if we can; otherwise, we just treat
1145		 * it as DLT_RAW, for now - if somebody needs to capture,
1146		 * on a 2.0[.x] kernel, on PPP devices that supply a
1147		 * link-layer header, they'll have to add code here to
1148		 * map to the appropriate DLT_ type (possibly adding a
1149		 * new DLT_ type, if necessary).
1150		 */
1151		if (cooked_ok)
1152			handle->linktype = DLT_LINUX_SLL;
1153		else {
1154			/*
1155			 * XXX - handle ISDN types here?  We can't fall
1156			 * back on cooked sockets, so we'd have to
1157			 * figure out from the device name what type of
1158			 * link-layer encapsulation it's using, and map
1159			 * that to an appropriate DLT_ value, meaning
1160			 * we'd map "isdnN" devices to DLT_RAW (they
1161			 * supply raw IP packets with no link-layer
1162			 * header) and "isdY" devices to a new DLT_I4L_IP
1163			 * type that has only an Ethernet packet type as
1164			 * a link-layer header.
1165			 *
1166			 * But sometimes we seem to get random crap
1167			 * in the link-layer header when capturing on
1168			 * ISDN devices....
1169			 */
1170			handle->linktype = DLT_RAW;
1171		}
1172		break;
1173
1174#ifndef ARPHRD_CISCO
1175#define ARPHRD_CISCO 513 /* previously ARPHRD_HDLC */
1176#endif
1177	case ARPHRD_CISCO:
1178		handle->linktype = DLT_C_HDLC;
1179		break;
1180
1181	/* Not sure if this is correct for all tunnels, but it
1182	 * works for CIPE */
1183	case ARPHRD_TUNNEL:
1184#ifndef ARPHRD_SIT
1185#define ARPHRD_SIT 776	/* From Linux 2.2.13 */
1186#endif
1187	case ARPHRD_SIT:
1188	case ARPHRD_CSLIP:
1189	case ARPHRD_SLIP6:
1190	case ARPHRD_CSLIP6:
1191	case ARPHRD_ADAPT:
1192	case ARPHRD_SLIP:
1193#ifndef ARPHRD_RAWHDLC
1194#define ARPHRD_RAWHDLC 518
1195#endif
1196	case ARPHRD_RAWHDLC:
1197#ifndef ARPHRD_DLCI
1198#define ARPHRD_DLCI 15
1199#endif
1200	case ARPHRD_DLCI:
1201		/*
1202		 * XXX - should some of those be mapped to DLT_LINUX_SLL
1203		 * instead?  Should we just map all of them to DLT_LINUX_SLL?
1204		 */
1205		handle->linktype = DLT_RAW;
1206		break;
1207
1208#ifndef ARPHRD_FRAD
1209#define ARPHRD_FRAD 770
1210#endif
1211	case ARPHRD_FRAD:
1212		handle->linktype = DLT_FRELAY;
1213		break;
1214
1215	case ARPHRD_LOCALTLK:
1216		handle->linktype = DLT_LTALK;
1217		break;
1218
1219#ifndef ARPHRD_FCPP
1220#define ARPHRD_FCPP	784
1221#endif
1222	case ARPHRD_FCPP:
1223#ifndef ARPHRD_FCAL
1224#define ARPHRD_FCAL	785
1225#endif
1226	case ARPHRD_FCAL:
1227#ifndef ARPHRD_FCPL
1228#define ARPHRD_FCPL	786
1229#endif
1230	case ARPHRD_FCPL:
1231#ifndef ARPHRD_FCFABRIC
1232#define ARPHRD_FCFABRIC	787
1233#endif
1234	case ARPHRD_FCFABRIC:
1235		/*
1236		 * We assume that those all mean RFC 2625 IP-over-
1237		 * Fibre Channel, with the RFC 2625 header at
1238		 * the beginning of the packet.
1239		 */
1240		handle->linktype = DLT_IP_OVER_FC;
1241		break;
1242
1243#ifndef ARPHRD_IRDA
1244#define ARPHRD_IRDA	783
1245#endif
1246	case ARPHRD_IRDA:
1247		/* Don't expect IP packet out of this interfaces... */
1248		handle->linktype = DLT_LINUX_IRDA;
1249		/* We need to save packet direction for IrDA decoding,
1250		 * so let's use "Linux-cooked" mode. Jean II */
1251		//handle->md.cooked = 1;
1252		break;
1253
1254	default:
1255		handle->linktype = -1;
1256		break;
1257	}
1258}
1259
1260/* ===== Functions to interface to the newer kernels ================== */
1261
1262/*
1263 *  Try to open a packet socket using the new kernel interface.
1264 *  Returns 0 on failure.
1265 *  FIXME: 0 uses to mean success (Sebastian)
1266 */
1267static int
1268live_open_new(pcap_t *handle, const char *device, int promisc,
1269	      int to_ms, char *ebuf)
1270{
1271#ifdef HAVE_PF_PACKET_SOCKETS
1272	int			sock_fd = -1, arptype;
1273	int			err;
1274	int			fatal_err = 0;
1275	struct packet_mreq	mr;
1276
1277	/* One shot loop used for error handling - bail out with break */
1278
1279	do {
1280		/*
1281		 * Open a socket with protocol family packet. If a device is
1282		 * given we try to open it in raw mode otherwise we use
1283		 * the cooked interface.
1284		 */
1285		sock_fd = device ?
1286			socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL))
1287		      : socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_ALL));
1288
1289		if (sock_fd == -1) {
1290			snprintf(ebuf, PCAP_ERRBUF_SIZE, "socket: %s",
1291				 pcap_strerror(errno) );
1292			break;
1293		}
1294
1295		/* It seems the kernel supports the new interface. */
1296		handle->md.sock_packet = 0;
1297
1298		/*
1299		 * Get the interface index of the loopback device.
1300		 * If the attempt fails, don't fail, just set the
1301		 * "md.lo_ifindex" to -1.
1302		 *
1303		 * XXX - can there be more than one device that loops
1304		 * packets back, i.e. devices other than "lo"?  If so,
1305		 * we'd need to find them all, and have an array of
1306		 * indices for them, and check all of them in
1307		 * "pcap_read_packet()".
1308		 */
1309		handle->md.lo_ifindex = iface_get_id(sock_fd, "lo", ebuf);
1310
1311		/*
1312		 * Default value for offset to align link-layer payload
1313		 * on a 4-byte boundary.
1314		 */
1315		handle->offset	 = 0;
1316
1317		/*
1318		 * What kind of frames do we have to deal with? Fall back
1319		 * to cooked mode if we have an unknown interface type.
1320		 */
1321
1322		if (device) {
1323			/* Assume for now we don't need cooked mode. */
1324			handle->md.cooked = 0;
1325
1326			arptype	= iface_get_arptype(sock_fd, device, ebuf);
1327			if (arptype == -1) {
1328				fatal_err = 1;
1329				break;
1330			}
1331			map_arphrd_to_dlt(handle, arptype, 1);
1332			if (handle->linktype == -1 ||
1333			    handle->linktype == DLT_LINUX_SLL ||
1334			    handle->linktype == DLT_LINUX_IRDA ||
1335			    (handle->linktype == DLT_EN10MB &&
1336			     (strncmp("isdn", device, 4) == 0 ||
1337			      strncmp("isdY", device, 4) == 0))) {
1338				/*
1339				 * Unknown interface type (-1), or a
1340				 * device we explicitly chose to run
1341				 * in cooked mode (e.g., PPP devices),
1342				 * or an ISDN device (whose link-layer
1343				 * type we can only determine by using
1344				 * APIs that may be different on different
1345				 * kernels) - reopen in cooked mode.
1346				 */
1347				if (close(sock_fd) == -1) {
1348					snprintf(ebuf, PCAP_ERRBUF_SIZE,
1349						 "close: %s", pcap_strerror(errno));
1350					break;
1351				}
1352				sock_fd = socket(PF_PACKET, SOCK_DGRAM,
1353						 htons(ETH_P_ALL));
1354				if (sock_fd == -1) {
1355					snprintf(ebuf, PCAP_ERRBUF_SIZE,
1356						 "socket: %s", pcap_strerror(errno));
1357					break;
1358				}
1359				handle->md.cooked = 1;
1360
1361				/*
1362				 * Get rid of any link-layer type list
1363				 * we allocated - this only supports cooked
1364				 * capture.
1365				 */
1366				if (handle->dlt_list != NULL) {
1367					free(handle->dlt_list);
1368					handle->dlt_list = NULL;
1369					handle->dlt_count = 0;
1370				}
1371
1372				if (handle->linktype == -1) {
1373					/*
1374					 * Warn that we're falling back on
1375					 * cooked mode; we may want to
1376					 * update "map_arphrd_to_dlt()"
1377					 * to handle the new type.
1378					 */
1379					snprintf(ebuf, PCAP_ERRBUF_SIZE,
1380						"arptype %d not "
1381						"supported by libpcap - "
1382						"falling back to cooked "
1383						"socket",
1384						arptype);
1385				}
1386				/* IrDA capture is not a real "cooked" capture,
1387				 * it's IrLAP frames, not IP packets. */
1388				if (handle->linktype != DLT_LINUX_IRDA)
1389					handle->linktype = DLT_LINUX_SLL;
1390			}
1391
1392			handle->md.ifindex = iface_get_id(sock_fd, device, ebuf);
1393			if (handle->md.ifindex == -1)
1394				break;
1395
1396			if ((err = iface_bind(sock_fd, handle->md.ifindex,
1397			    ebuf)) < 0) {
1398				if (err == -2)
1399					fatal_err = 1;
1400				break;
1401			}
1402		} else {
1403			/*
1404			 * This is cooked mode.
1405			 */
1406			handle->md.cooked = 1;
1407			handle->linktype = DLT_LINUX_SLL;
1408
1409			/*
1410			 * We're not bound to a device.
1411			 * XXX - true?  Or true only if we're using
1412			 * the "any" device?
1413			 * For now, we're using this as an indication
1414			 * that we can't transmit; stop doing that only
1415			 * if we figure out how to transmit in cooked
1416			 * mode.
1417			 */
1418			handle->md.ifindex = -1;
1419		}
1420
1421		/*
1422		 * Select promiscuous mode on if "promisc" is set.
1423		 *
1424		 * Do not turn allmulti mode on if we don't select
1425		 * promiscuous mode - on some devices (e.g., Orinoco
1426		 * wireless interfaces), allmulti mode isn't supported
1427		 * and the driver implements it by turning promiscuous
1428		 * mode on, and that screws up the operation of the
1429		 * card as a normal networking interface, and on no
1430		 * other platform I know of does starting a non-
1431		 * promiscuous capture affect which multicast packets
1432		 * are received by the interface.
1433		 */
1434
1435		/*
1436		 * Hmm, how can we set promiscuous mode on all interfaces?
1437		 * I am not sure if that is possible at all.
1438		 */
1439
1440		if (device && promisc) {
1441			memset(&mr, 0, sizeof(mr));
1442			mr.mr_ifindex = handle->md.ifindex;
1443			mr.mr_type    = PACKET_MR_PROMISC;
1444			if (setsockopt(sock_fd, SOL_PACKET,
1445				PACKET_ADD_MEMBERSHIP, &mr, sizeof(mr)) == -1)
1446			{
1447				snprintf(ebuf, PCAP_ERRBUF_SIZE,
1448					"setsockopt: %s", pcap_strerror(errno));
1449				break;
1450			}
1451		}
1452
1453		/* Save the socket FD in the pcap structure */
1454
1455		handle->fd 	 = sock_fd;
1456
1457		return 1;
1458
1459	} while(0);
1460
1461	if (sock_fd != -1)
1462		close(sock_fd);
1463
1464	if (fatal_err) {
1465		/*
1466		 * Get rid of any link-layer type list we allocated.
1467		 */
1468		if (handle->dlt_list != NULL)
1469			free(handle->dlt_list);
1470		return -2;
1471	} else
1472		return 0;
1473#else
1474	strncpy(ebuf,
1475		"New packet capturing interface not supported by build "
1476		"environment", PCAP_ERRBUF_SIZE);
1477	return 0;
1478#endif
1479}
1480
1481#ifdef HAVE_PF_PACKET_SOCKETS
1482/*
1483 *  Return the index of the given device name. Fill ebuf and return
1484 *  -1 on failure.
1485 */
1486static int
1487iface_get_id(int fd, const char *device, char *ebuf)
1488{
1489	struct ifreq	ifr;
1490
1491	memset(&ifr, 0, sizeof(ifr));
1492	strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
1493
1494	if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
1495		snprintf(ebuf, PCAP_ERRBUF_SIZE,
1496			 "ioctl: %s", pcap_strerror(errno));
1497		return -1;
1498	}
1499
1500	return ifr.ifr_ifindex;
1501}
1502
1503/*
1504 *  Bind the socket associated with FD to the given device.
1505 */
1506static int
1507iface_bind(int fd, int ifindex, char *ebuf)
1508{
1509	struct sockaddr_ll	sll;
1510	int			err;
1511	socklen_t		errlen = sizeof(err);
1512
1513	memset(&sll, 0, sizeof(sll));
1514	sll.sll_family		= AF_PACKET;
1515	sll.sll_ifindex		= ifindex;
1516	sll.sll_protocol	= htons(ETH_P_ALL);
1517
1518	if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) {
1519		snprintf(ebuf, PCAP_ERRBUF_SIZE,
1520			 "bind: %s", pcap_strerror(errno));
1521		return -1;
1522	}
1523
1524	/* Any pending errors, e.g., network is down? */
1525
1526	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
1527		snprintf(ebuf, PCAP_ERRBUF_SIZE,
1528			"getsockopt: %s", pcap_strerror(errno));
1529		return -2;
1530	}
1531
1532	if (err > 0) {
1533		snprintf(ebuf, PCAP_ERRBUF_SIZE,
1534			"bind: %s", pcap_strerror(err));
1535		return -2;
1536	}
1537
1538	return 0;
1539}
1540
1541#endif
1542
1543
1544/* ===== Functions to interface to the older kernels ================== */
1545
1546/*
1547 * With older kernels promiscuous mode is kind of interesting because we
1548 * have to reset the interface before exiting. The problem can't really
1549 * be solved without some daemon taking care of managing usage counts.
1550 * If we put the interface into promiscuous mode, we set a flag indicating
1551 * that we must take it out of that mode when the interface is closed,
1552 * and, when closing the interface, if that flag is set we take it out
1553 * of promiscuous mode.
1554 */
1555
1556/*
1557 * List of pcaps for which we turned promiscuous mode on by hand.
1558 * If there are any such pcaps, we arrange to call "pcap_close_all()"
1559 * when we exit, and have it close all of them to turn promiscuous mode
1560 * off.
1561 */
1562static struct pcap *pcaps_to_close;
1563
1564/*
1565 * TRUE if we've already called "atexit()" to cause "pcap_close_all()" to
1566 * be called on exit.
1567 */
1568static int did_atexit;
1569
1570static void	pcap_close_all(void)
1571{
1572	struct pcap *handle;
1573
1574	while ((handle = pcaps_to_close) != NULL)
1575		pcap_close(handle);
1576}
1577
1578static void	pcap_close_linux( pcap_t *handle )
1579{
1580	struct pcap	*p, *prevp;
1581	struct ifreq	ifr;
1582
1583	if (handle->md.clear_promisc) {
1584		/*
1585		 * We put the interface into promiscuous mode; take
1586		 * it out of promiscuous mode.
1587		 *
1588		 * XXX - if somebody else wants it in promiscuous mode,
1589		 * this code cannot know that, so it'll take it out
1590		 * of promiscuous mode.  That's not fixable in 2.0[.x]
1591		 * kernels.
1592		 */
1593		memset(&ifr, 0, sizeof(ifr));
1594		strncpy(ifr.ifr_name, handle->md.device, sizeof(ifr.ifr_name));
1595		if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) {
1596			fprintf(stderr,
1597			    "Can't restore interface flags (SIOCGIFFLAGS failed: %s).\n"
1598			    "Please adjust manually.\n"
1599			    "Hint: This can't happen with Linux >= 2.2.0.\n",
1600			    strerror(errno));
1601		} else {
1602			if (ifr.ifr_flags & IFF_PROMISC) {
1603				/*
1604				 * Promiscuous mode is currently on; turn it
1605				 * off.
1606				 */
1607				ifr.ifr_flags &= ~IFF_PROMISC;
1608				if (ioctl(handle->fd, SIOCSIFFLAGS, &ifr) == -1) {
1609					fprintf(stderr,
1610					    "Can't restore interface flags (SIOCSIFFLAGS failed: %s).\n"
1611					    "Please adjust manually.\n"
1612					    "Hint: This can't happen with Linux >= 2.2.0.\n",
1613					    strerror(errno));
1614				}
1615			}
1616		}
1617
1618		/*
1619		 * Take this pcap out of the list of pcaps for which we
1620		 * have to take the interface out of promiscuous mode.
1621		 */
1622		for (p = pcaps_to_close, prevp = NULL; p != NULL;
1623		    prevp = p, p = p->md.next) {
1624			if (p == handle) {
1625				/*
1626				 * Found it.  Remove it from the list.
1627				 */
1628				if (prevp == NULL) {
1629					/*
1630					 * It was at the head of the list.
1631					 */
1632					pcaps_to_close = p->md.next;
1633				} else {
1634					/*
1635					 * It was in the middle of the list.
1636					 */
1637					prevp->md.next = p->md.next;
1638				}
1639				break;
1640			}
1641		}
1642	}
1643
1644	if (handle->md.device != NULL)
1645		free(handle->md.device);
1646	handle->md.device = NULL;
1647	pcap_close_common(handle);
1648}
1649
1650/*
1651 *  Try to open a packet socket using the old kernel interface.
1652 *  Returns 0 on failure.
1653 *  FIXME: 0 uses to mean success (Sebastian)
1654 */
1655static int
1656live_open_old(pcap_t *handle, const char *device, int promisc,
1657	      int to_ms, char *ebuf)
1658{
1659	int		arptype;
1660	struct ifreq	ifr;
1661
1662	do {
1663		/* Open the socket */
1664
1665		handle->fd = socket(PF_INET, SOCK_PACKET, htons(ETH_P_ALL));
1666		if (handle->fd == -1) {
1667			snprintf(ebuf, PCAP_ERRBUF_SIZE,
1668				 "socket: %s", pcap_strerror(errno));
1669			break;
1670		}
1671
1672		/* It worked - we are using the old interface */
1673		handle->md.sock_packet = 1;
1674
1675		/* ...which means we get the link-layer header. */
1676		handle->md.cooked = 0;
1677
1678		/* Bind to the given device */
1679
1680		if (!device) {
1681		        strncpy(ebuf, "pcap_open_live: The \"any\" device isn't supported on 2.0[.x]-kernel systems",
1682				PCAP_ERRBUF_SIZE);
1683			break;
1684		}
1685		if (iface_bind_old(handle->fd, device, ebuf) == -1)
1686			break;
1687
1688		/*
1689		 * Try to get the link-layer type.
1690		 */
1691		arptype = iface_get_arptype(handle->fd, device, ebuf);
1692		if (arptype == -1)
1693			break;
1694
1695		/*
1696		 * Try to find the DLT_ type corresponding to that
1697		 * link-layer type.
1698		 */
1699		map_arphrd_to_dlt(handle, arptype, 0);
1700		if (handle->linktype == -1) {
1701			snprintf(ebuf, PCAP_ERRBUF_SIZE,
1702				 "unknown arptype %d", arptype);
1703			break;
1704		}
1705
1706		/* Go to promisc mode if requested */
1707
1708		if (promisc) {
1709			memset(&ifr, 0, sizeof(ifr));
1710			strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
1711			if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) {
1712				snprintf(ebuf, PCAP_ERRBUF_SIZE,
1713					 "ioctl: %s", pcap_strerror(errno));
1714				break;
1715			}
1716			if ((ifr.ifr_flags & IFF_PROMISC) == 0) {
1717				/*
1718				 * Promiscuous mode isn't currently on,
1719				 * so turn it on, and remember that
1720				 * we should turn it off when the
1721				 * pcap_t is closed.
1722				 */
1723
1724				/*
1725				 * If we haven't already done so, arrange
1726				 * to have "pcap_close_all()" called when
1727				 * we exit.
1728				 */
1729				if (!did_atexit) {
1730					if (atexit(pcap_close_all) == -1) {
1731						/*
1732						 * "atexit()" failed; don't
1733						 * put the interface in
1734						 * promiscuous mode, just
1735						 * give up.
1736						 */
1737						strncpy(ebuf, "atexit failed",
1738							PCAP_ERRBUF_SIZE);
1739						break;
1740					}
1741					did_atexit = 1;
1742				}
1743
1744				ifr.ifr_flags |= IFF_PROMISC;
1745				if (ioctl(handle->fd, SIOCSIFFLAGS, &ifr) == -1) {
1746				        snprintf(ebuf, PCAP_ERRBUF_SIZE,
1747						 "ioctl: %s",
1748						 pcap_strerror(errno));
1749					break;
1750				}
1751				handle->md.clear_promisc = 1;
1752
1753				/*
1754				 * Add this to the list of pcaps
1755				 * to close when we exit.
1756				 */
1757				handle->md.next = pcaps_to_close;
1758				pcaps_to_close = handle;
1759			}
1760		}
1761
1762		/*
1763		 * Default value for offset to align link-layer payload
1764		 * on a 4-byte boundary.
1765		 */
1766		handle->offset	 = 0;
1767
1768		return 1;
1769
1770	} while (0);
1771
1772	pcap_close_linux(handle);
1773	return 0;
1774}
1775
1776/*
1777 *  Bind the socket associated with FD to the given device using the
1778 *  interface of the old kernels.
1779 */
1780static int
1781iface_bind_old(int fd, const char *device, char *ebuf)
1782{
1783	struct sockaddr	saddr;
1784	int		err;
1785	socklen_t	errlen = sizeof(err);
1786
1787	memset(&saddr, 0, sizeof(saddr));
1788	strncpy(saddr.sa_data, device, sizeof(saddr.sa_data));
1789	if (bind(fd, &saddr, sizeof(saddr)) == -1) {
1790		snprintf(ebuf, PCAP_ERRBUF_SIZE,
1791			 "bind: %s", pcap_strerror(errno));
1792		return -1;
1793	}
1794
1795	/* Any pending errors, e.g., network is down? */
1796
1797	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
1798		snprintf(ebuf, PCAP_ERRBUF_SIZE,
1799			"getsockopt: %s", pcap_strerror(errno));
1800		return -1;
1801	}
1802
1803	if (err > 0) {
1804		snprintf(ebuf, PCAP_ERRBUF_SIZE,
1805			"bind: %s", pcap_strerror(err));
1806		return -1;
1807	}
1808
1809	return 0;
1810}
1811
1812
1813/* ===== System calls available on all supported kernels ============== */
1814
1815/*
1816 *  Query the kernel for the MTU of the given interface.
1817 */
1818static int
1819iface_get_mtu(int fd, const char *device, char *ebuf)
1820{
1821	struct ifreq	ifr;
1822
1823	if (!device)
1824		return BIGGER_THAN_ALL_MTUS;
1825
1826	memset(&ifr, 0, sizeof(ifr));
1827	strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
1828
1829	if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) {
1830		snprintf(ebuf, PCAP_ERRBUF_SIZE,
1831			 "ioctl: %s", pcap_strerror(errno));
1832		return -1;
1833	}
1834
1835	return ifr.ifr_mtu;
1836}
1837
1838/*
1839 *  Get the hardware type of the given interface as ARPHRD_xxx constant.
1840 */
1841static int
1842iface_get_arptype(int fd, const char *device, char *ebuf)
1843{
1844	struct ifreq	ifr;
1845
1846	memset(&ifr, 0, sizeof(ifr));
1847	strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
1848
1849	if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1850		snprintf(ebuf, PCAP_ERRBUF_SIZE,
1851			 "ioctl: %s", pcap_strerror(errno));
1852		return -1;
1853	}
1854
1855	return ifr.ifr_hwaddr.sa_family;
1856}
1857
1858#ifdef SO_ATTACH_FILTER
1859static int
1860fix_program(pcap_t *handle, struct sock_fprog *fcode)
1861{
1862	size_t prog_size;
1863	register int i;
1864	register struct bpf_insn *p;
1865	struct bpf_insn *f;
1866	int len;
1867
1868	/*
1869	 * Make a copy of the filter, and modify that copy if
1870	 * necessary.
1871	 */
1872	prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len;
1873	len = handle->fcode.bf_len;
1874	f = (struct bpf_insn *)malloc(prog_size);
1875	if (f == NULL) {
1876		snprintf(handle->errbuf, sizeof(handle->errbuf),
1877			 "malloc: %s", pcap_strerror(errno));
1878		return -1;
1879	}
1880	memcpy(f, handle->fcode.bf_insns, prog_size);
1881	fcode->len = len;
1882	fcode->filter = (struct sock_filter *) f;
1883
1884	for (i = 0; i < len; ++i) {
1885		p = &f[i];
1886		/*
1887		 * What type of instruction is this?
1888		 */
1889		switch (BPF_CLASS(p->code)) {
1890
1891		case BPF_RET:
1892			/*
1893			 * It's a return instruction; is the snapshot
1894			 * length a constant, rather than the contents
1895			 * of the accumulator?
1896			 */
1897			if (BPF_MODE(p->code) == BPF_K) {
1898				/*
1899				 * Yes - if the value to be returned,
1900				 * i.e. the snapshot length, is anything
1901				 * other than 0, make it 65535, so that
1902				 * the packet is truncated by "recvfrom()",
1903				 * not by the filter.
1904				 *
1905				 * XXX - there's nothing we can easily do
1906				 * if it's getting the value from the
1907				 * accumulator; we'd have to insert
1908				 * code to force non-zero values to be
1909				 * 65535.
1910				 */
1911				if (p->k != 0)
1912					p->k = 65535;
1913			}
1914			break;
1915
1916		case BPF_LD:
1917		case BPF_LDX:
1918			/*
1919			 * It's a load instruction; is it loading
1920			 * from the packet?
1921			 */
1922			switch (BPF_MODE(p->code)) {
1923
1924			case BPF_ABS:
1925			case BPF_IND:
1926			case BPF_MSH:
1927				/*
1928				 * Yes; are we in cooked mode?
1929				 */
1930				if (handle->md.cooked) {
1931					/*
1932					 * Yes, so we need to fix this
1933					 * instruction.
1934					 */
1935					if (fix_offset(p) < 0) {
1936						/*
1937						 * We failed to do so.
1938						 * Return 0, so our caller
1939						 * knows to punt to userland.
1940						 */
1941						return 0;
1942					}
1943				}
1944				break;
1945			}
1946			break;
1947		}
1948	}
1949	return 1;	/* we succeeded */
1950}
1951
1952static int
1953fix_offset(struct bpf_insn *p)
1954{
1955	/*
1956	 * What's the offset?
1957	 */
1958	if (p->k >= SLL_HDR_LEN) {
1959		/*
1960		 * It's within the link-layer payload; that starts at an
1961		 * offset of 0, as far as the kernel packet filter is
1962		 * concerned, so subtract the length of the link-layer
1963		 * header.
1964		 */
1965		p->k -= SLL_HDR_LEN;
1966	} else if (p->k == 14) {
1967		/*
1968		 * It's the protocol field; map it to the special magic
1969		 * kernel offset for that field.
1970		 */
1971		p->k = SKF_AD_OFF + SKF_AD_PROTOCOL;
1972	} else {
1973		/*
1974		 * It's within the header, but it's not one of those
1975		 * fields; we can't do that in the kernel, so punt
1976		 * to userland.
1977		 */
1978		return -1;
1979	}
1980	return 0;
1981}
1982
1983static int
1984set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode)
1985{
1986	int total_filter_on = 0;
1987	int save_mode;
1988	int ret;
1989	int save_errno;
1990
1991	/*
1992	 * The socket filter code doesn't discard all packets queued
1993	 * up on the socket when the filter is changed; this means
1994	 * that packets that don't match the new filter may show up
1995	 * after the new filter is put onto the socket, if those
1996	 * packets haven't yet been read.
1997	 *
1998	 * This means, for example, that if you do a tcpdump capture
1999	 * with a filter, the first few packets in the capture might
2000	 * be packets that wouldn't have passed the filter.
2001	 *
2002	 * We therefore discard all packets queued up on the socket
2003	 * when setting a kernel filter.  (This isn't an issue for
2004	 * userland filters, as the userland filtering is done after
2005	 * packets are queued up.)
2006	 *
2007	 * To flush those packets, we put the socket in read-only mode,
2008	 * and read packets from the socket until there are no more to
2009	 * read.
2010	 *
2011	 * In order to keep that from being an infinite loop - i.e.,
2012	 * to keep more packets from arriving while we're draining
2013	 * the queue - we put the "total filter", which is a filter
2014	 * that rejects all packets, onto the socket before draining
2015	 * the queue.
2016	 *
2017	 * This code deliberately ignores any errors, so that you may
2018	 * get bogus packets if an error occurs, rather than having
2019	 * the filtering done in userland even if it could have been
2020	 * done in the kernel.
2021	 */
2022	if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
2023		       &total_fcode, sizeof(total_fcode)) == 0) {
2024		char drain[1];
2025
2026		/*
2027		 * Note that we've put the total filter onto the socket.
2028		 */
2029		total_filter_on = 1;
2030
2031		/*
2032		 * Save the socket's current mode, and put it in
2033		 * non-blocking mode; we drain it by reading packets
2034		 * until we get an error (which is normally a
2035		 * "nothing more to be read" error).
2036		 */
2037		save_mode = fcntl(handle->fd, F_GETFL, 0);
2038		if (save_mode != -1 &&
2039		    fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) >= 0) {
2040			while (recv(handle->fd, &drain, sizeof drain,
2041			       MSG_TRUNC) >= 0)
2042				;
2043			save_errno = errno;
2044			fcntl(handle->fd, F_SETFL, save_mode);
2045			if (save_errno != EAGAIN) {
2046				/* Fatal error */
2047				reset_kernel_filter(handle);
2048				snprintf(handle->errbuf, sizeof(handle->errbuf),
2049				 "recv: %s", pcap_strerror(save_errno));
2050				return -2;
2051			}
2052		}
2053	}
2054
2055	/*
2056	 * Now attach the new filter.
2057	 */
2058	ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
2059			 fcode, sizeof(*fcode));
2060	if (ret == -1 && total_filter_on) {
2061		/*
2062		 * Well, we couldn't set that filter on the socket,
2063		 * but we could set the total filter on the socket.
2064		 *
2065		 * This could, for example, mean that the filter was
2066		 * too big to put into the kernel, so we'll have to
2067		 * filter in userland; in any case, we'll be doing
2068		 * filtering in userland, so we need to remove the
2069		 * total filter so we see packets.
2070		 */
2071		save_errno = errno;
2072
2073		/*
2074		 * XXX - if this fails, we're really screwed;
2075		 * we have the total filter on the socket,
2076		 * and it won't come off.  What do we do then?
2077		 */
2078		reset_kernel_filter(handle);
2079
2080		errno = save_errno;
2081	}
2082	return ret;
2083}
2084
2085static int
2086reset_kernel_filter(pcap_t *handle)
2087{
2088	/* setsockopt() barfs unless it get a dummy parameter */
2089	int dummy;
2090
2091	return setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER,
2092				   &dummy, sizeof(dummy));
2093}
2094#endif
2095