1/*-
2 * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved.
3 * Copyright (c) 2000 Darrell Anderson
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28/*
29 * netdump_client.c
30 * FreeBSD subsystem supporting netdump network dumps.
31 * A dedicated server must be running to accept client dumps.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD$");
36
37#include <sys/param.h>
38#include <sys/conf.h>
39#include <sys/disk.h>
40#include <sys/endian.h>
41#include <sys/jail.h>
42#include <sys/kernel.h>
43#include <sys/kerneldump.h>
44#include <sys/mbuf.h>
45#include <sys/module.h>
46#include <sys/priv.h>
47#include <sys/proc.h>
48#include <sys/protosw.h>
49#include <sys/socket.h>
50#include <sys/sysctl.h>
51#include <sys/systm.h>
52
53#include <net/ethernet.h>
54#include <net/if.h>
55#include <net/if_arp.h>
56#include <net/if_dl.h>
57#include <net/if_types.h>
58#include <net/if_var.h>
59
60#include <netinet/in.h>
61#include <netinet/in_systm.h>
62#include <netinet/in_var.h>
63#include <netinet/ip.h>
64#include <netinet/ip_var.h>
65#include <netinet/ip_options.h>
66#include <netinet/udp.h>
67#include <netinet/udp_var.h>
68#include <netinet/netdump/netdump.h>
69
70#include <machine/in_cksum.h>
71#include <machine/pcb.h>
72
73#define	NETDDEBUG(f, ...) do {						\
74	if (nd_debug > 0)						\
75		printf(("%s: " f), __func__, ## __VA_ARGS__);		\
76} while (0)
77#define	NETDDEBUG_IF(i, f, ...) do {					\
78	if (nd_debug > 0)						\
79		if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__);	\
80} while (0)
81#define	NETDDEBUGV(f, ...) do {						\
82	if (nd_debug > 1)						\
83		printf(("%s: " f), __func__, ## __VA_ARGS__);		\
84} while (0)
85#define	NETDDEBUGV_IF(i, f, ...) do {					\
86	if (nd_debug > 1)						\
87		if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__);	\
88} while (0)
89
90static int	 netdump_arp_gw(void);
91static void	 netdump_cleanup(void);
92static int	 netdump_configure(struct netdump_conf *, struct thread *);
93static int	 netdump_dumper(void *priv __unused, void *virtual,
94		    vm_offset_t physical __unused, off_t offset, size_t length);
95static int	 netdump_ether_output(struct mbuf *m, struct ifnet *ifp,
96		    struct ether_addr dst, u_short etype);
97static void	 netdump_handle_arp(struct mbuf **mb);
98static void	 netdump_handle_ip(struct mbuf **mb);
99static int	 netdump_ioctl(struct cdev *dev __unused, u_long cmd,
100		    caddr_t addr, int flags __unused, struct thread *td);
101static int	 netdump_modevent(module_t mod, int type, void *priv);
102static void	 netdump_network_poll(void);
103static void	 netdump_pkt_in(struct ifnet *ifp, struct mbuf *m);
104static int	 netdump_send(uint32_t type, off_t offset, unsigned char *data,
105		    uint32_t datalen);
106static int	 netdump_send_arp(in_addr_t dst);
107static int	 netdump_start(struct dumperinfo *di);
108static int	 netdump_udp_output(struct mbuf *m);
109
110/* Must be at least as big as the chunks dumpsys() gives us. */
111static unsigned char nd_buf[MAXDUMPPGS * PAGE_SIZE];
112static uint32_t nd_seqno;
113static int dump_failed, have_gw_mac;
114static void (*drv_if_input)(struct ifnet *, struct mbuf *);
115static int restore_gw_addr;
116
117static uint64_t rcvd_acks;
118CTASSERT(sizeof(rcvd_acks) * NBBY == NETDUMP_MAX_IN_FLIGHT);
119
120/* Configuration parameters. */
121static struct netdump_conf nd_conf;
122#define	nd_server	nd_conf.ndc_server
123#define	nd_client	nd_conf.ndc_client
124#define	nd_gateway	nd_conf.ndc_gateway
125
126/* General dynamic settings. */
127static struct ether_addr nd_gw_mac;
128static struct ifnet *nd_ifp;
129static uint16_t nd_server_port = NETDUMP_PORT;
130
131FEATURE(netdump, "Netdump client support");
132
133static SYSCTL_NODE(_net, OID_AUTO, netdump, CTLFLAG_RD, NULL,
134    "netdump parameters");
135
136static int nd_debug;
137SYSCTL_INT(_net_netdump, OID_AUTO, debug, CTLFLAG_RWTUN,
138    &nd_debug, 0,
139    "Debug message verbosity");
140static int nd_enabled;
141SYSCTL_INT(_net_netdump, OID_AUTO, enabled, CTLFLAG_RD,
142    &nd_enabled, 0,
143    "netdump configuration status");
144static char nd_path[MAXPATHLEN];
145SYSCTL_STRING(_net_netdump, OID_AUTO, path, CTLFLAG_RW,
146    nd_path, sizeof(nd_path),
147    "Server path for output files");
148static int nd_polls = 2000;
149SYSCTL_INT(_net_netdump, OID_AUTO, polls, CTLFLAG_RWTUN,
150    &nd_polls, 0,
151    "Number of times to poll before assuming packet loss (0.5ms per poll)");
152static int nd_retries = 10;
153SYSCTL_INT(_net_netdump, OID_AUTO, retries, CTLFLAG_RWTUN,
154    &nd_retries, 0,
155    "Number of retransmit attempts before giving up");
156static int nd_arp_retries = 3;
157SYSCTL_INT(_net_netdump, OID_AUTO, arp_retries, CTLFLAG_RWTUN,
158    &nd_arp_retries, 0,
159    "Number of ARP attempts before giving up");
160
161/*
162 * Checks for netdump support on a network interface
163 *
164 * Parameters:
165 *	ifp	The network interface that is being tested for support
166 *
167 * Returns:
168 *	int	1 if the interface is supported, 0 if not
169 */
170static bool
171netdump_supported_nic(struct ifnet *ifp)
172{
173
174	return (ifp->if_netdump_methods != NULL);
175}
176
177/*-
178 * Network specific primitives.
179 * Following down the code they are divided ordered as:
180 * - Packet buffer primitives
181 * - Output primitives
182 * - Input primitives
183 * - Polling primitives
184 */
185
186/*
187 * Handles creation of the ethernet header, then places outgoing packets into
188 * the tx buffer for the NIC
189 *
190 * Parameters:
191 *	m	The mbuf containing the packet to be sent (will be freed by
192 *		this function or the NIC driver)
193 *	ifp	The interface to send on
194 *	dst	The destination ethernet address (source address will be looked
195 *		up using ifp)
196 *	etype	The ETHERTYPE_* value for the protocol that is being sent
197 *
198 * Returns:
199 *	int	see errno.h, 0 for success
200 */
201static int
202netdump_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst,
203    u_short etype)
204{
205	struct ether_header *eh;
206
207	if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) ||
208	    (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) {
209		if_printf(ifp, "netdump_ether_output: interface isn't up\n");
210		m_freem(m);
211		return (ENETDOWN);
212	}
213
214	/* Fill in the ethernet header. */
215	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
216	if (m == NULL) {
217		printf("%s: out of mbufs\n", __func__);
218		return (ENOBUFS);
219	}
220	eh = mtod(m, struct ether_header *);
221	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
222	memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN);
223	eh->ether_type = htons(etype);
224	return ((ifp->if_netdump_methods->nd_transmit)(ifp, m));
225}
226
227/*
228 * Unreliable transmission of an mbuf chain to the netdump server
229 * Note: can't handle fragmentation; fails if the packet is larger than
230 *	 nd_ifp->if_mtu after adding the UDP/IP headers
231 *
232 * Parameters:
233 *	m	mbuf chain
234 *
235 * Returns:
236 *	int	see errno.h, 0 for success
237 */
238static int
239netdump_udp_output(struct mbuf *m)
240{
241	struct udpiphdr *ui;
242	struct ip *ip;
243
244	MPASS(nd_ifp != NULL);
245
246	M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT);
247	if (m == NULL) {
248		printf("%s: out of mbufs\n", __func__);
249		return (ENOBUFS);
250	}
251
252	if (m->m_pkthdr.len > nd_ifp->if_mtu) {
253		printf("netdump_udp_output: Packet is too big: %d > MTU %u\n",
254		    m->m_pkthdr.len, nd_ifp->if_mtu);
255		m_freem(m);
256		return (ENOBUFS);
257	}
258
259	ui = mtod(m, struct udpiphdr *);
260	bzero(ui->ui_x1, sizeof(ui->ui_x1));
261	ui->ui_pr = IPPROTO_UDP;
262	ui->ui_len = htons(m->m_pkthdr.len - sizeof(struct ip));
263	ui->ui_ulen = ui->ui_len;
264	ui->ui_src = nd_client;
265	ui->ui_dst = nd_server;
266	/* Use this src port so that the server can connect() the socket */
267	ui->ui_sport = htons(NETDUMP_ACKPORT);
268	ui->ui_dport = htons(nd_server_port);
269	ui->ui_sum = 0;
270	if ((ui->ui_sum = in_cksum(m, m->m_pkthdr.len)) == 0)
271		ui->ui_sum = 0xffff;
272
273	ip = mtod(m, struct ip *);
274	ip->ip_v = IPVERSION;
275	ip->ip_hl = sizeof(struct ip) >> 2;
276	ip->ip_tos = 0;
277	ip->ip_len = htons(m->m_pkthdr.len);
278	ip->ip_id = 0;
279	ip->ip_off = htons(IP_DF);
280	ip->ip_ttl = 255;
281	ip->ip_sum = 0;
282	ip->ip_sum = in_cksum(m, sizeof(struct ip));
283
284	return (netdump_ether_output(m, nd_ifp, nd_gw_mac, ETHERTYPE_IP));
285}
286
287/*
288 * Builds and sends a single ARP request to locate the server
289 *
290 * Return value:
291 *	0 on success
292 *	errno on error
293 */
294static int
295netdump_send_arp(in_addr_t dst)
296{
297	struct ether_addr bcast;
298	struct mbuf *m;
299	struct arphdr *ah;
300	int pktlen;
301
302	MPASS(nd_ifp != NULL);
303
304	/* Fill-up a broadcast address. */
305	memset(&bcast, 0xFF, ETHER_ADDR_LEN);
306	m = m_gethdr(M_NOWAIT, MT_DATA);
307	if (m == NULL) {
308		printf("netdump_send_arp: Out of mbufs\n");
309		return (ENOBUFS);
310	}
311	pktlen = arphdr_len2(ETHER_ADDR_LEN, sizeof(struct in_addr));
312	m->m_len = pktlen;
313	m->m_pkthdr.len = pktlen;
314	MH_ALIGN(m, pktlen);
315	ah = mtod(m, struct arphdr *);
316	ah->ar_hrd = htons(ARPHRD_ETHER);
317	ah->ar_pro = htons(ETHERTYPE_IP);
318	ah->ar_hln = ETHER_ADDR_LEN;
319	ah->ar_pln = sizeof(struct in_addr);
320	ah->ar_op = htons(ARPOP_REQUEST);
321	memcpy(ar_sha(ah), IF_LLADDR(nd_ifp), ETHER_ADDR_LEN);
322	((struct in_addr *)ar_spa(ah))->s_addr = nd_client.s_addr;
323	bzero(ar_tha(ah), ETHER_ADDR_LEN);
324	((struct in_addr *)ar_tpa(ah))->s_addr = dst;
325	return (netdump_ether_output(m, nd_ifp, bcast, ETHERTYPE_ARP));
326}
327
328/*
329 * Sends ARP requests to locate the server and waits for a response.
330 * We first try to ARP the server itself, and fall back to the provided
331 * gateway if the server appears to be off-link.
332 *
333 * Return value:
334 *	0 on success
335 *	errno on error
336 */
337static int
338netdump_arp_gw(void)
339{
340	in_addr_t dst;
341	int error, polls, retries;
342
343	dst = nd_server.s_addr;
344restart:
345	for (retries = 0; retries < nd_arp_retries && have_gw_mac == 0;
346	    retries++) {
347		error = netdump_send_arp(dst);
348		if (error != 0)
349			return (error);
350		for (polls = 0; polls < nd_polls && have_gw_mac == 0; polls++) {
351			netdump_network_poll();
352			DELAY(500);
353		}
354		if (have_gw_mac == 0)
355			printf("(ARP retry)");
356	}
357	if (have_gw_mac != 0)
358		return (0);
359	if (dst == nd_server.s_addr && nd_server.s_addr != nd_gateway.s_addr) {
360		printf("Failed to ARP server, trying to reach gateway...\n");
361		dst = nd_gateway.s_addr;
362		goto restart;
363	}
364
365	printf("\nARP timed out.\n");
366	return (ETIMEDOUT);
367}
368
369/*
370 * Dummy free function for netdump clusters.
371 */
372static void
373netdump_mbuf_free(struct mbuf *m __unused)
374{
375}
376
377/*
378 * Construct and reliably send a netdump packet.  May fail from a resource
379 * shortage or extreme number of unacknowledged retransmissions.  Wait for
380 * an acknowledgement before returning.  Splits packets into chunks small
381 * enough to be sent without fragmentation (looks up the interface MTU)
382 *
383 * Parameters:
384 *	type	netdump packet type (HERALD, FINISHED, or VMCORE)
385 *	offset	vmcore data offset (bytes)
386 *	data	vmcore data
387 *	datalen	vmcore data size (bytes)
388 *
389 * Returns:
390 *	int see errno.h, 0 for success
391 */
392static int
393netdump_send(uint32_t type, off_t offset, unsigned char *data, uint32_t datalen)
394{
395	struct netdump_msg_hdr *nd_msg_hdr;
396	struct mbuf *m, *m2;
397	uint64_t want_acks;
398	uint32_t i, pktlen, sent_so_far;
399	int retries, polls, error;
400
401	want_acks = 0;
402	rcvd_acks = 0;
403	retries = 0;
404
405	MPASS(nd_ifp != NULL);
406
407retransmit:
408	/* Chunks can be too big to fit in packets. */
409	for (i = sent_so_far = 0; sent_so_far < datalen ||
410	    (i == 0 && datalen == 0); i++) {
411		pktlen = datalen - sent_so_far;
412
413		/* First bound: the packet structure. */
414		pktlen = min(pktlen, NETDUMP_DATASIZE);
415
416		/* Second bound: the interface MTU (assume no IP options). */
417		pktlen = min(pktlen, nd_ifp->if_mtu - sizeof(struct udpiphdr) -
418		    sizeof(struct netdump_msg_hdr));
419
420		/*
421		 * Check if it is retransmitting and this has been ACKed
422		 * already.
423		 */
424		if ((rcvd_acks & (1 << i)) != 0) {
425			sent_so_far += pktlen;
426			continue;
427		}
428
429		/*
430		 * Get and fill a header mbuf, then chain data as an extended
431		 * mbuf.
432		 */
433		m = m_gethdr(M_NOWAIT, MT_DATA);
434		if (m == NULL) {
435			printf("netdump_send: Out of mbufs\n");
436			return (ENOBUFS);
437		}
438		m->m_len = sizeof(struct netdump_msg_hdr);
439		m->m_pkthdr.len = sizeof(struct netdump_msg_hdr);
440		MH_ALIGN(m, sizeof(struct netdump_msg_hdr));
441		nd_msg_hdr = mtod(m, struct netdump_msg_hdr *);
442		nd_msg_hdr->mh_seqno = htonl(nd_seqno + i);
443		nd_msg_hdr->mh_type = htonl(type);
444		nd_msg_hdr->mh_offset = htobe64(offset + sent_so_far);
445		nd_msg_hdr->mh_len = htonl(pktlen);
446		nd_msg_hdr->mh__pad = 0;
447
448		if (pktlen != 0) {
449			m2 = m_get(M_NOWAIT, MT_DATA);
450			if (m2 == NULL) {
451				m_freem(m);
452				printf("netdump_send: Out of mbufs\n");
453				return (ENOBUFS);
454			}
455			MEXTADD(m2, data + sent_so_far, pktlen,
456			    netdump_mbuf_free, NULL, NULL, 0, EXT_DISPOSABLE);
457			m2->m_len = pktlen;
458
459			m_cat(m, m2);
460			m->m_pkthdr.len += pktlen;
461		}
462		error = netdump_udp_output(m);
463		if (error != 0)
464			return (error);
465
466		/* Note that we're waiting for this packet in the bitfield. */
467		want_acks |= (1 << i);
468		sent_so_far += pktlen;
469	}
470	if (i >= NETDUMP_MAX_IN_FLIGHT)
471		printf("Warning: Sent more than %d packets (%d). "
472		    "Acknowledgements will fail unless the size of "
473		    "rcvd_acks/want_acks is increased.\n",
474		    NETDUMP_MAX_IN_FLIGHT, i);
475
476	/*
477	 * Wait for acks.  A *real* window would speed things up considerably.
478	 */
479	polls = 0;
480	while (rcvd_acks != want_acks) {
481		if (polls++ > nd_polls) {
482			if (retries++ > nd_retries)
483				return (ETIMEDOUT);
484			printf(". ");
485			goto retransmit;
486		}
487		netdump_network_poll();
488		DELAY(500);
489	}
490	nd_seqno += i;
491	return (0);
492}
493
494/*
495 * Handler for IP packets: checks their sanity and then processes any netdump
496 * ACK packets it finds.
497 *
498 * It needs to replicate partially the behaviour of ip_input() and
499 * udp_input().
500 *
501 * Parameters:
502 *	mb	a pointer to an mbuf * containing the packet received
503 *		Updates *mb if m_pullup et al change the pointer
504 *		Assumes the calling function will take care of freeing the mbuf
505 */
506static void
507netdump_handle_ip(struct mbuf **mb)
508{
509	struct ip *ip;
510	struct udpiphdr *udp;
511	struct netdump_ack *nd_ack;
512	struct mbuf *m;
513	int rcv_ackno;
514	unsigned short hlen;
515
516	/* IP processing. */
517	m = *mb;
518	if (m->m_pkthdr.len < sizeof(struct ip)) {
519		NETDDEBUG("dropping packet too small for IP header\n");
520		return;
521	}
522	if (m->m_len < sizeof(struct ip)) {
523		m = m_pullup(m, sizeof(struct ip));
524		*mb = m;
525		if (m == NULL) {
526			NETDDEBUG("m_pullup failed\n");
527			return;
528		}
529	}
530	ip = mtod(m, struct ip *);
531
532	/* IP version. */
533	if (ip->ip_v != IPVERSION) {
534		NETDDEBUG("bad IP version %d\n", ip->ip_v);
535		return;
536	}
537
538	/* Header length. */
539	hlen = ip->ip_hl << 2;
540	if (hlen < sizeof(struct ip)) {
541		NETDDEBUG("bad IP header length (%hu)\n", hlen);
542		return;
543	}
544	if (hlen > m->m_len) {
545		m = m_pullup(m, hlen);
546		*mb = m;
547		if (m == NULL) {
548			NETDDEBUG("m_pullup failed\n");
549			return;
550		}
551		ip = mtod(m, struct ip *);
552	}
553	/* Ignore packets with IP options. */
554	if (hlen > sizeof(struct ip)) {
555		NETDDEBUG("drop packet with IP options\n");
556		return;
557	}
558
559#ifdef INVARIANTS
560	if ((IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
561	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) &&
562	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
563		NETDDEBUG("Bad IP header (RFC1122)\n");
564		return;
565	}
566#endif
567
568	/* Checksum. */
569	if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
570		if ((m->m_pkthdr.csum_flags & CSUM_IP_VALID) == 0) {
571			NETDDEBUG("bad IP checksum\n");
572			return;
573		}
574	} else {
575		/* XXX */ ;
576	}
577
578	/* Convert fields to host byte order. */
579	ip->ip_len = ntohs(ip->ip_len);
580	if (ip->ip_len < hlen) {
581		NETDDEBUG("IP packet smaller (%hu) than header (%hu)\n",
582		    ip->ip_len, hlen);
583		return;
584	}
585	if (m->m_pkthdr.len < ip->ip_len) {
586		NETDDEBUG("IP packet bigger (%hu) than ethernet packet (%d)\n",
587		    ip->ip_len, m->m_pkthdr.len);
588		return;
589	}
590	if (m->m_pkthdr.len > ip->ip_len) {
591
592		/* Truncate the packet to the IP length. */
593		if (m->m_len == m->m_pkthdr.len) {
594			m->m_len = ip->ip_len;
595			m->m_pkthdr.len = ip->ip_len;
596		} else
597			m_adj(m, ip->ip_len - m->m_pkthdr.len);
598	}
599
600	ip->ip_off = ntohs(ip->ip_off);
601
602	/* Check that the source is the server's IP. */
603	if (ip->ip_src.s_addr != nd_server.s_addr) {
604		NETDDEBUG("drop packet not from server (from 0x%x)\n",
605		    ip->ip_src.s_addr);
606		return;
607	}
608
609	/* Check if the destination IP is ours. */
610	if (ip->ip_dst.s_addr != nd_client.s_addr) {
611		NETDDEBUGV("drop packet not to our IP\n");
612		return;
613	}
614
615	if (ip->ip_p != IPPROTO_UDP) {
616		NETDDEBUG("drop non-UDP packet\n");
617		return;
618	}
619
620	/* Do not deal with fragments. */
621	if ((ip->ip_off & (IP_MF | IP_OFFMASK)) != 0) {
622		NETDDEBUG("drop fragmented packet\n");
623		return;
624	}
625
626	/* UDP custom is to have packet length not include IP header. */
627	ip->ip_len -= hlen;
628
629	/* UDP processing. */
630
631	/* Get IP and UDP headers together, along with the netdump packet. */
632	if (m->m_pkthdr.len <
633	    sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) {
634		NETDDEBUG("ignoring small packet\n");
635		return;
636	}
637	if (m->m_len < sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) {
638		m = m_pullup(m, sizeof(struct udpiphdr) +
639		    sizeof(struct netdump_ack));
640		*mb = m;
641		if (m == NULL) {
642			NETDDEBUG("m_pullup failed\n");
643			return;
644		}
645	}
646	udp = mtod(m, struct udpiphdr *);
647
648	if (ntohs(udp->ui_u.uh_dport) != NETDUMP_ACKPORT) {
649		NETDDEBUG("not on the netdump port.\n");
650		return;
651	}
652
653	/* Netdump processing. */
654
655	/*
656	 * Packet is meant for us.  Extract the ack sequence number and the
657	 * port number if necessary.
658	 */
659	nd_ack = (struct netdump_ack *)(mtod(m, caddr_t) +
660	    sizeof(struct udpiphdr));
661	rcv_ackno = ntohl(nd_ack->na_seqno);
662	if (nd_server_port == NETDUMP_PORT)
663		nd_server_port = ntohs(udp->ui_u.uh_sport);
664	if (rcv_ackno >= nd_seqno + NETDUMP_MAX_IN_FLIGHT)
665		printf("%s: ACK %d too far in future!\n", __func__, rcv_ackno);
666	else if (rcv_ackno >= nd_seqno) {
667		/* We're interested in this ack. Record it. */
668		rcvd_acks |= 1 << (rcv_ackno - nd_seqno);
669	}
670}
671
672/*
673 * Handler for ARP packets: checks their sanity and then
674 * 1. If the ARP is a request for our IP, respond with our MAC address
675 * 2. If the ARP is a response from our server, record its MAC address
676 *
677 * It needs to replicate partially the behaviour of arpintr() and
678 * in_arpinput().
679 *
680 * Parameters:
681 *	mb	a pointer to an mbuf * containing the packet received
682 *		Updates *mb if m_pullup et al change the pointer
683 *		Assumes the calling function will take care of freeing the mbuf
684 */
685static void
686netdump_handle_arp(struct mbuf **mb)
687{
688	char buf[INET_ADDRSTRLEN];
689	struct in_addr isaddr, itaddr, myaddr;
690	struct ether_addr dst;
691	struct mbuf *m;
692	struct arphdr *ah;
693	struct ifnet *ifp;
694	uint8_t *enaddr;
695	int req_len, op;
696
697	m = *mb;
698	ifp = m->m_pkthdr.rcvif;
699	if (m->m_len < sizeof(struct arphdr)) {
700		m = m_pullup(m, sizeof(struct arphdr));
701		*mb = m;
702		if (m == NULL) {
703			NETDDEBUG("runt packet: m_pullup failed\n");
704			return;
705		}
706	}
707
708	ah = mtod(m, struct arphdr *);
709	if (ntohs(ah->ar_hrd) != ARPHRD_ETHER) {
710		NETDDEBUG("unknown hardware address 0x%2D)\n",
711		    (unsigned char *)&ah->ar_hrd, "");
712		return;
713	}
714	if (ntohs(ah->ar_pro) != ETHERTYPE_IP) {
715		NETDDEBUG("drop ARP for unknown protocol %d\n",
716		    ntohs(ah->ar_pro));
717		return;
718	}
719	req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
720	if (m->m_len < req_len) {
721		m = m_pullup(m, req_len);
722		*mb = m;
723		if (m == NULL) {
724			NETDDEBUG("runt packet: m_pullup failed\n");
725			return;
726		}
727	}
728	ah = mtod(m, struct arphdr *);
729
730	op = ntohs(ah->ar_op);
731	memcpy(&isaddr, ar_spa(ah), sizeof(isaddr));
732	memcpy(&itaddr, ar_tpa(ah), sizeof(itaddr));
733	enaddr = (uint8_t *)IF_LLADDR(ifp);
734	myaddr = nd_client;
735
736	if (memcmp(ar_sha(ah), enaddr, ifp->if_addrlen) == 0) {
737		NETDDEBUG("ignoring ARP from myself\n");
738		return;
739	}
740
741	if (isaddr.s_addr == nd_client.s_addr) {
742		printf("%s: %*D is using my IP address %s!\n", __func__,
743		    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
744		    inet_ntoa_r(isaddr, buf));
745		return;
746	}
747
748	if (memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen) == 0) {
749		NETDDEBUG("ignoring ARP from broadcast address\n");
750		return;
751	}
752
753	if (op == ARPOP_REPLY) {
754		if (isaddr.s_addr != nd_gateway.s_addr &&
755		    isaddr.s_addr != nd_server.s_addr) {
756			inet_ntoa_r(isaddr, buf);
757			NETDDEBUG(
758			    "ignoring ARP reply from %s (not netdump server)\n",
759			    buf);
760			return;
761		}
762		memcpy(nd_gw_mac.octet, ar_sha(ah),
763		    min(ah->ar_hln, ETHER_ADDR_LEN));
764		have_gw_mac = 1;
765		NETDDEBUG("got server MAC address %6D\n", nd_gw_mac.octet, ":");
766		return;
767	}
768
769	if (op != ARPOP_REQUEST) {
770		NETDDEBUG("ignoring ARP non-request/reply\n");
771		return;
772	}
773
774	if (itaddr.s_addr != nd_client.s_addr) {
775		NETDDEBUG("ignoring ARP not to our IP\n");
776		return;
777	}
778
779	memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
780	memcpy(ar_sha(ah), enaddr, ah->ar_hln);
781	memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
782	memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
783	ah->ar_op = htons(ARPOP_REPLY);
784	ah->ar_pro = htons(ETHERTYPE_IP);
785	m->m_flags &= ~(M_BCAST|M_MCAST);
786	m->m_len = arphdr_len(ah);
787	m->m_pkthdr.len = m->m_len;
788
789	memcpy(dst.octet, ar_tha(ah), ETHER_ADDR_LEN);
790	netdump_ether_output(m, ifp, dst, ETHERTYPE_ARP);
791	*mb = NULL;
792}
793
794/*
795 * Handler for incoming packets directly from the network adapter
796 * Identifies the packet type (IP or ARP) and passes it along to one of the
797 * helper functions netdump_handle_ip or netdump_handle_arp.
798 *
799 * It needs to replicate partially the behaviour of ether_input() and
800 * ether_demux().
801 *
802 * Parameters:
803 *	ifp	the interface the packet came from (should be nd_ifp)
804 *	m	an mbuf containing the packet received
805 */
806static void
807netdump_pkt_in(struct ifnet *ifp, struct mbuf *m)
808{
809	struct ifreq ifr;
810	struct ether_header *eh;
811	u_short etype;
812
813	/* Ethernet processing. */
814	if ((m->m_flags & M_PKTHDR) == 0) {
815		NETDDEBUG_IF(ifp, "discard frame without packet header\n");
816		goto done;
817	}
818	if (m->m_len < ETHER_HDR_LEN) {
819		NETDDEBUG_IF(ifp,
820	    "discard frame without leading eth header (len %u pktlen %u)\n",
821		    m->m_len, m->m_pkthdr.len);
822		goto done;
823	}
824	if ((m->m_flags & M_HASFCS) != 0) {
825		m_adj(m, -ETHER_CRC_LEN);
826		m->m_flags &= ~M_HASFCS;
827	}
828	eh = mtod(m, struct ether_header *);
829	etype = ntohs(eh->ether_type);
830	if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) {
831		NETDDEBUG_IF(ifp, "ignoring vlan packets\n");
832		goto done;
833	}
834	if (if_gethwaddr(ifp, &ifr) != 0) {
835		NETDDEBUG_IF(ifp, "failed to get hw addr for interface\n");
836		goto done;
837	}
838	if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost,
839	    ETHER_ADDR_LEN) != 0) {
840		NETDDEBUG_IF(ifp,
841		    "discard frame with incorrect destination addr\n");
842		goto done;
843	}
844
845	/* Done ethernet processing. Strip off the ethernet header. */
846	m_adj(m, ETHER_HDR_LEN);
847	switch (etype) {
848	case ETHERTYPE_ARP:
849		netdump_handle_arp(&m);
850		break;
851	case ETHERTYPE_IP:
852		netdump_handle_ip(&m);
853		break;
854	default:
855		NETDDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype);
856		break;
857	}
858done:
859	if (m != NULL)
860		m_freem(m);
861}
862
863/*
864 * After trapping, instead of assuming that most of the network stack is sane,
865 * we just poll the driver directly for packets.
866 */
867static void
868netdump_network_poll(void)
869{
870
871	MPASS(nd_ifp != NULL);
872
873	nd_ifp->if_netdump_methods->nd_poll(nd_ifp, 1000);
874}
875
876/*-
877 * Dumping specific primitives.
878 */
879
880/*
881 * Callback from dumpsys() to dump a chunk of memory.
882 * Copies it out to our static buffer then sends it across the network.
883 * Detects the initial KDH and makes sure it is given a special packet type.
884 *
885 * Parameters:
886 *	priv	 Unused. Optional private pointer.
887 *	virtual  Virtual address (where to read the data from)
888 *	physical Unused. Physical memory address.
889 *	offset	 Offset from start of core file
890 *	length	 Data length
891 *
892 * Return value:
893 *	0 on success
894 *	errno on error
895 */
896static int
897netdump_dumper(void *priv __unused, void *virtual,
898    vm_offset_t physical __unused, off_t offset, size_t length)
899{
900	int error;
901
902	NETDDEBUGV("netdump_dumper(NULL, %p, NULL, %ju, %zu)\n",
903	    virtual, (uintmax_t)offset, length);
904
905	if (virtual == NULL) {
906		if (dump_failed != 0)
907			printf("failed to dump the kernel core\n");
908		else if (netdump_send(NETDUMP_FINISHED, 0, NULL, 0) != 0)
909			printf("failed to close the transaction\n");
910		else
911			printf("\nnetdump finished.\n");
912		netdump_cleanup();
913		return (0);
914	}
915	if (length > sizeof(nd_buf))
916		return (ENOSPC);
917
918	memmove(nd_buf, virtual, length);
919	error = netdump_send(NETDUMP_VMCORE, offset, nd_buf, length);
920	if (error != 0) {
921		dump_failed = 1;
922		return (error);
923	}
924	return (0);
925}
926
927/*
928 * Perform any initalization needed prior to transmitting the kernel core.
929 */
930static int
931netdump_start(struct dumperinfo *di)
932{
933	char *path;
934	char buf[INET_ADDRSTRLEN];
935	uint32_t len;
936	int error;
937
938	error = 0;
939
940	/* Check if the dumping is allowed to continue. */
941	if (nd_enabled == 0)
942		return (EINVAL);
943
944	if (panicstr == NULL) {
945		printf(
946		    "netdump_start: netdump may only be used after a panic\n");
947		return (EINVAL);
948	}
949
950	MPASS(nd_ifp != NULL);
951
952	if (nd_server.s_addr == INADDR_ANY) {
953		printf("netdump_start: can't netdump; no server IP given\n");
954		return (EINVAL);
955	}
956	if (nd_client.s_addr == INADDR_ANY) {
957		printf("netdump_start: can't netdump; no client IP given\n");
958		return (EINVAL);
959	}
960
961	/* We start dumping at offset 0. */
962	di->dumpoff = 0;
963
964	nd_seqno = 1;
965
966	/*
967	 * nd_server_port could have switched after the first ack the
968	 * first time it gets called.  Adjust it accordingly.
969	 */
970	nd_server_port = NETDUMP_PORT;
971
972	/* Switch to the netdump mbuf zones. */
973	netdump_mbuf_dump();
974
975	nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_START);
976
977	/* Make the card use *our* receive callback. */
978	drv_if_input = nd_ifp->if_input;
979	nd_ifp->if_input = netdump_pkt_in;
980
981	if (nd_gateway.s_addr == INADDR_ANY) {
982		restore_gw_addr = 1;
983		nd_gateway.s_addr = nd_server.s_addr;
984	}
985
986	printf("netdump in progress. searching for server...\n");
987	if (netdump_arp_gw()) {
988		printf("failed to locate server MAC address\n");
989		error = EINVAL;
990		goto trig_abort;
991	}
992
993	if (nd_path[0] != '\0') {
994		path = nd_path;
995		len = strlen(path) + 1;
996	} else {
997		path = NULL;
998		len = 0;
999	}
1000	if (netdump_send(NETDUMP_HERALD, 0, path, len) != 0) {
1001		printf("failed to contact netdump server\n");
1002		error = EINVAL;
1003		goto trig_abort;
1004	}
1005	printf("netdumping to %s (%6D)\n", inet_ntoa_r(nd_server, buf),
1006	    nd_gw_mac.octet, ":");
1007	return (0);
1008
1009trig_abort:
1010	netdump_cleanup();
1011	return (error);
1012}
1013
1014static int
1015netdump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh,
1016    void *key, uint32_t keysize)
1017{
1018	int error;
1019
1020	memcpy(nd_buf, kdh, sizeof(*kdh));
1021	error = netdump_send(NETDUMP_KDH, 0, nd_buf, sizeof(*kdh));
1022	if (error == 0 && keysize > 0) {
1023		if (keysize > sizeof(nd_buf))
1024			return (EINVAL);
1025		memcpy(nd_buf, key, keysize);
1026		error = netdump_send(NETDUMP_EKCD_KEY, 0, nd_buf, keysize);
1027	}
1028	return (error);
1029}
1030
1031/*
1032 * Cleanup routine for a possibly failed netdump.
1033 */
1034static void
1035netdump_cleanup(void)
1036{
1037
1038	if (restore_gw_addr != 0) {
1039		nd_gateway.s_addr = INADDR_ANY;
1040		restore_gw_addr = 0;
1041	}
1042	if (drv_if_input != NULL) {
1043		nd_ifp->if_input = drv_if_input;
1044		drv_if_input = NULL;
1045	}
1046	nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_END);
1047}
1048
1049/*-
1050 * KLD specific code.
1051 */
1052
1053static struct cdevsw netdump_cdevsw = {
1054	.d_version =	D_VERSION,
1055	.d_ioctl =	netdump_ioctl,
1056	.d_name =	"netdump",
1057};
1058
1059static struct cdev *netdump_cdev;
1060
1061static int
1062netdump_configure(struct netdump_conf *conf, struct thread *td)
1063{
1064	struct ifnet *ifp;
1065
1066	CURVNET_SET(TD_TO_VNET(td));
1067	if (!IS_DEFAULT_VNET(curvnet)) {
1068		CURVNET_RESTORE();
1069		return (EINVAL);
1070	}
1071	IFNET_RLOCK_NOSLEEP();
1072	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1073		if (strcmp(ifp->if_xname, conf->ndc_iface) == 0)
1074			break;
1075	}
1076	/* XXX ref */
1077	IFNET_RUNLOCK_NOSLEEP();
1078	CURVNET_RESTORE();
1079
1080	if (ifp == NULL)
1081		return (ENOENT);
1082	if ((if_getflags(ifp) & IFF_UP) == 0)
1083		return (ENXIO);
1084	if (!netdump_supported_nic(ifp) || ifp->if_type != IFT_ETHER)
1085		return (EINVAL);
1086
1087	nd_ifp = ifp;
1088	netdump_reinit(ifp);
1089	memcpy(&nd_conf, conf, sizeof(nd_conf));
1090	nd_enabled = 1;
1091	return (0);
1092}
1093
1094/*
1095 * Reinitialize the mbuf pool used by drivers while dumping. This is called
1096 * from the generic ioctl handler for SIOCSIFMTU after the driver has
1097 * reconfigured itself.
1098 */
1099void
1100netdump_reinit(struct ifnet *ifp)
1101{
1102	int clsize, nmbuf, ncl, nrxr;
1103
1104	if (ifp != nd_ifp)
1105		return;
1106
1107	ifp->if_netdump_methods->nd_init(ifp, &nrxr, &ncl, &clsize);
1108	KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr));
1109
1110	/*
1111	 * We need two headers per message on the transmit side. Multiply by
1112	 * four to give us some breathing room.
1113	 */
1114	nmbuf = ncl * (4 + nrxr);
1115	ncl *= nrxr;
1116	netdump_mbuf_reinit(nmbuf, ncl, clsize);
1117}
1118
1119/*
1120 * ioctl(2) handler for the netdump device. This is currently only used to
1121 * register netdump as a dump device.
1122 *
1123 * Parameters:
1124 *     dev, Unused.
1125 *     cmd, The ioctl to be handled.
1126 *     addr, The parameter for the ioctl.
1127 *     flags, Unused.
1128 *     td, The thread invoking this ioctl.
1129 *
1130 * Returns:
1131 *     0 on success, and an errno value on failure.
1132 */
1133static int
1134netdump_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr,
1135    int flags __unused, struct thread *td)
1136{
1137	struct diocskerneldump_arg *kda;
1138	struct dumperinfo dumper;
1139	struct netdump_conf *conf;
1140	uint8_t *encryptedkey;
1141	int error;
1142	u_int u;
1143
1144	error = 0;
1145	switch (cmd) {
1146	case DIOCSKERNELDUMP:
1147		u = *(u_int *)addr;
1148		if (u != 0) {
1149			error = ENXIO;
1150			break;
1151		}
1152
1153		if (nd_enabled) {
1154			nd_enabled = 0;
1155			netdump_mbuf_drain();
1156		}
1157		break;
1158	case NETDUMPGCONF:
1159		conf = (struct netdump_conf *)addr;
1160		if (!nd_enabled) {
1161			error = ENXIO;
1162			break;
1163		}
1164
1165		strlcpy(conf->ndc_iface, nd_ifp->if_xname,
1166		    sizeof(conf->ndc_iface));
1167		memcpy(&conf->ndc_server, &nd_server, sizeof(nd_server));
1168		memcpy(&conf->ndc_client, &nd_client, sizeof(nd_client));
1169		memcpy(&conf->ndc_gateway, &nd_gateway, sizeof(nd_gateway));
1170		break;
1171	case NETDUMPSCONF:
1172		conf = (struct netdump_conf *)addr;
1173		encryptedkey = NULL;
1174		kda = &conf->ndc_kda;
1175
1176		conf->ndc_iface[sizeof(conf->ndc_iface) - 1] = '\0';
1177		if (kda->kda_enable == 0) {
1178			if (nd_enabled) {
1179				error = clear_dumper(td);
1180				if (error == 0) {
1181					nd_enabled = 0;
1182					netdump_mbuf_drain();
1183				}
1184			}
1185			break;
1186		}
1187
1188		error = netdump_configure(conf, td);
1189		if (error != 0)
1190			break;
1191
1192		if (kda->kda_encryption != KERNELDUMP_ENC_NONE) {
1193			if (kda->kda_encryptedkeysize <= 0 ||
1194			    kda->kda_encryptedkeysize >
1195			    KERNELDUMP_ENCKEY_MAX_SIZE)
1196				return (EINVAL);
1197			encryptedkey = malloc(kda->kda_encryptedkeysize, M_TEMP,
1198			    M_WAITOK);
1199			error = copyin(kda->kda_encryptedkey, encryptedkey,
1200			    kda->kda_encryptedkeysize);
1201			if (error != 0) {
1202				free(encryptedkey, M_TEMP);
1203				return (error);
1204			}
1205		}
1206
1207		memset(&dumper, 0, sizeof(dumper));
1208		dumper.dumper_start = netdump_start;
1209		dumper.dumper_hdr = netdump_write_headers;
1210		dumper.dumper = netdump_dumper;
1211		dumper.priv = NULL;
1212		dumper.blocksize = NETDUMP_DATASIZE;
1213		dumper.maxiosize = MAXDUMPPGS * PAGE_SIZE;
1214		dumper.mediaoffset = 0;
1215		dumper.mediasize = 0;
1216
1217		error = set_dumper(&dumper, conf->ndc_iface, td,
1218		    kda->kda_compression, kda->kda_encryption,
1219		    kda->kda_key, kda->kda_encryptedkeysize,
1220		    encryptedkey);
1221		if (encryptedkey != NULL) {
1222			explicit_bzero(encryptedkey, kda->kda_encryptedkeysize);
1223			free(encryptedkey, M_TEMP);
1224		}
1225		if (error != 0) {
1226			nd_enabled = 0;
1227			netdump_mbuf_drain();
1228		}
1229		break;
1230	default:
1231		error = EINVAL;
1232		break;
1233	}
1234	return (error);
1235}
1236
1237/*
1238 * Called upon system init or kld load.  Initializes the netdump parameters to
1239 * sane defaults (locates the first available NIC and uses the first IPv4 IP on
1240 * that card as the client IP).  Leaves the server IP unconfigured.
1241 *
1242 * Parameters:
1243 *	mod, Unused.
1244 *	what, The module event type.
1245 *	priv, Unused.
1246 *
1247 * Returns:
1248 *	int, An errno value if an error occured, 0 otherwise.
1249 */
1250static int
1251netdump_modevent(module_t mod __unused, int what, void *priv __unused)
1252{
1253	struct netdump_conf conf;
1254	char *arg;
1255	int error;
1256
1257	error = 0;
1258	switch (what) {
1259	case MOD_LOAD:
1260		error = make_dev_p(MAKEDEV_WAITOK, &netdump_cdev,
1261		    &netdump_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "netdump");
1262		if (error != 0)
1263			return (error);
1264
1265		if ((arg = kern_getenv("net.dump.iface")) != NULL) {
1266			strlcpy(conf.ndc_iface, arg, sizeof(conf.ndc_iface));
1267			freeenv(arg);
1268
1269			if ((arg = kern_getenv("net.dump.server")) != NULL) {
1270				inet_aton(arg, &conf.ndc_server);
1271				freeenv(arg);
1272			}
1273			if ((arg = kern_getenv("net.dump.client")) != NULL) {
1274				inet_aton(arg, &conf.ndc_server);
1275				freeenv(arg);
1276			}
1277			if ((arg = kern_getenv("net.dump.gateway")) != NULL) {
1278				inet_aton(arg, &conf.ndc_server);
1279				freeenv(arg);
1280			}
1281
1282			/* Ignore errors; we print a message to the console. */
1283			(void)netdump_configure(&conf, curthread);
1284		}
1285		break;
1286	case MOD_UNLOAD:
1287		destroy_dev(netdump_cdev);
1288		if (nd_enabled) {
1289			printf("netdump: disabling dump device for unload\n");
1290			(void)clear_dumper(curthread);
1291			nd_enabled = 0;
1292		}
1293		break;
1294	default:
1295		error = EOPNOTSUPP;
1296		break;
1297	}
1298	return (error);
1299}
1300
1301static moduledata_t netdump_mod = {
1302	"netdump",
1303	netdump_modevent,
1304	NULL,
1305};
1306
1307MODULE_VERSION(netdump, 1);
1308DECLARE_MODULE(netdump, netdump_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
1309