1/*	$OpenBSD: if_ethersubr.c,v 1.293 2024/02/14 22:41:48 bluhm Exp $	*/
2/*	$NetBSD: if_ethersubr.c,v 1.19 1996/05/07 02:40:30 thorpej Exp $	*/
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1989, 1993
35 *	The Regents of the University of California.  All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 *    notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 *    notice, this list of conditions and the following disclaimer in the
44 *    documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 *    may be used to endorse or promote products derived from this software
47 *    without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
62 */
63
64/*
65%%% portions-copyright-nrl-95
66Portions of this software are Copyright 1995-1998 by Randall Atkinson,
67Ronald Lee, Daniel McDonald, Bao Phan, and Chris Winters. All Rights
68Reserved. All rights under this copyright have been assigned to the US
69Naval Research Laboratory (NRL). The NRL Copyright Notice and License
70Agreement Version 1.1 (January 17, 1995) applies to these portions of the
71software.
72You should have received a copy of the license with this software. If you
73didn't get a copy, you may request one from <license@ipv6.nrl.navy.mil>.
74*/
75
76#include "bpfilter.h"
77
78#include <sys/param.h>
79#include <sys/systm.h>
80#include <sys/kernel.h>
81#include <sys/malloc.h>
82#include <sys/mbuf.h>
83#include <sys/socket.h>
84#include <sys/ioctl.h>
85#include <sys/errno.h>
86#include <sys/syslog.h>
87#include <sys/timeout.h>
88#include <sys/smr.h>
89
90#include <net/if.h>
91#include <net/netisr.h>
92#include <net/route.h>
93#include <net/if_llc.h>
94#include <net/if_dl.h>
95#include <net/if_media.h>
96#include <net/if_types.h>
97
98#include <netinet/in.h>
99#include <netinet/if_ether.h>
100#include <netinet/ip_ipsp.h>
101#include <netinet/ip.h>
102#include <netinet/ip6.h>
103#include <netinet/tcp.h>
104#include <netinet/udp.h>
105
106#if NBPFILTER > 0
107#include <net/bpf.h>
108#endif
109
110#include "vlan.h"
111#if NVLAN > 0
112#include <net/if_vlan_var.h>
113#endif
114
115#include "carp.h"
116#if NCARP > 0
117#include <netinet/ip_carp.h>
118#endif
119
120#include "pppoe.h"
121#if NPPPOE > 0
122#include <net/if_pppoe.h>
123#endif
124
125#include "bpe.h"
126#if NBPE > 0
127#include <net/if_bpe.h>
128#endif
129
130#ifdef INET6
131#include <netinet6/in6_var.h>
132#include <netinet6/nd6.h>
133#endif
134
135#ifdef PIPEX
136#include <net/pipex.h>
137#endif
138
139#ifdef MPLS
140#include <netmpls/mpls.h>
141#endif /* MPLS */
142
143/* #define ETHERDEBUG 1 */
144#ifdef ETHERDEBUG
145int etherdebug = ETHERDEBUG;
146#define DNPRINTF(level, fmt, args...)					\
147	do {								\
148		if (etherdebug >= level)				\
149			printf("%s: " fmt "\n", __func__, ## args);	\
150	} while (0)
151#else
152#define DNPRINTF(level, fmt, args...)					\
153	do { } while (0)
154#endif
155#define DPRINTF(fmt, args...)	DNPRINTF(1, fmt, args)
156
157u_int8_t etherbroadcastaddr[ETHER_ADDR_LEN] =
158    { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
159u_int8_t etheranyaddr[ETHER_ADDR_LEN] =
160    { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
161#define senderr(e) { error = (e); goto bad;}
162
163int
164ether_ioctl(struct ifnet *ifp, struct arpcom *arp, u_long cmd, caddr_t data)
165{
166	struct ifreq *ifr = (struct ifreq *)data;
167	int error = 0;
168
169	switch (cmd) {
170	case SIOCSIFADDR:
171		break;
172
173	case SIOCSIFMTU:
174		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > ifp->if_hardmtu)
175			error = EINVAL;
176		else
177			ifp->if_mtu = ifr->ifr_mtu;
178		break;
179
180	case SIOCADDMULTI:
181	case SIOCDELMULTI:
182		if (ifp->if_flags & IFF_MULTICAST) {
183			error = (cmd == SIOCADDMULTI) ?
184			    ether_addmulti(ifr, arp) :
185			    ether_delmulti(ifr, arp);
186		} else
187			error = ENOTTY;
188		break;
189
190	default:
191		error = ENOTTY;
192	}
193
194	return (error);
195}
196
197
198void
199ether_rtrequest(struct ifnet *ifp, int req, struct rtentry *rt)
200{
201	if (rt == NULL)
202		return;
203
204	switch (rt_key(rt)->sa_family) {
205	case AF_INET:
206		arp_rtrequest(ifp, req, rt);
207		break;
208#ifdef INET6
209	case AF_INET6:
210		nd6_rtrequest(ifp, req, rt);
211		break;
212#endif
213	default:
214		break;
215	}
216}
217
218int
219ether_resolve(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
220    struct rtentry *rt, struct ether_header *eh)
221{
222	struct arpcom *ac = (struct arpcom *)ifp;
223	sa_family_t af = dst->sa_family;
224	int error = 0;
225
226	if (!ISSET(ifp->if_flags, IFF_RUNNING))
227		senderr(ENETDOWN);
228
229	KASSERT(rt != NULL || ISSET(m->m_flags, M_MCAST|M_BCAST) ||
230		af == AF_UNSPEC || af == pseudo_AF_HDRCMPLT);
231
232#ifdef DIAGNOSTIC
233	if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) {
234		printf("%s: trying to send packet on wrong domain. "
235		    "if %d vs. mbuf %d\n", ifp->if_xname,
236		    ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid));
237	}
238#endif
239
240	switch (af) {
241	case AF_INET:
242		error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
243		if (error)
244			return (error);
245		eh->ether_type = htons(ETHERTYPE_IP);
246
247		/*
248		 * If broadcasting on a simplex interface, loopback a copy.
249		 * The checksum must be calculated in software.  Keep the
250		 * condition in sync with in_ifcap_cksum().
251		 */
252		if (ISSET(m->m_flags, M_BCAST) &&
253		    ISSET(ifp->if_flags, IFF_SIMPLEX) &&
254		    !m->m_pkthdr.pf.routed) {
255			struct mbuf *mcopy;
256
257			/* XXX Should we input an unencrypted IPsec packet? */
258			mcopy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
259			if (mcopy != NULL)
260				if_input_local(ifp, mcopy, af);
261		}
262		break;
263#ifdef INET6
264	case AF_INET6:
265		error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
266		if (error)
267			return (error);
268		eh->ether_type = htons(ETHERTYPE_IPV6);
269		break;
270#endif
271#ifdef MPLS
272	case AF_MPLS:
273		if (rt == NULL)
274			senderr(EHOSTUNREACH);
275
276		if (!ISSET(ifp->if_xflags, IFXF_MPLS))
277			senderr(ENETUNREACH);
278
279		dst = ISSET(rt->rt_flags, RTF_GATEWAY) ?
280		    rt->rt_gateway : rt_key(rt);
281
282		switch (dst->sa_family) {
283		case AF_LINK:
284			if (satosdl(dst)->sdl_alen < sizeof(eh->ether_dhost))
285				senderr(EHOSTUNREACH);
286			memcpy(eh->ether_dhost, LLADDR(satosdl(dst)),
287			    sizeof(eh->ether_dhost));
288			break;
289#ifdef INET6
290		case AF_INET6:
291			error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost);
292			if (error)
293				return (error);
294			break;
295#endif
296		case AF_INET:
297			error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
298			if (error)
299				return (error);
300			break;
301		default:
302			senderr(EHOSTUNREACH);
303		}
304		/* XXX handling for simplex devices in case of M/BCAST ?? */
305		if (m->m_flags & (M_BCAST | M_MCAST))
306			eh->ether_type = htons(ETHERTYPE_MPLS_MCAST);
307		else
308			eh->ether_type = htons(ETHERTYPE_MPLS);
309		break;
310#endif /* MPLS */
311	case pseudo_AF_HDRCMPLT:
312		/* take the whole header from the sa */
313		memcpy(eh, dst->sa_data, sizeof(*eh));
314		return (0);
315
316	case AF_UNSPEC:
317		/* take the dst and type from the sa, but get src below */
318		memcpy(eh, dst->sa_data, sizeof(*eh));
319		break;
320
321	default:
322		printf("%s: can't handle af%d\n", ifp->if_xname, af);
323		senderr(EAFNOSUPPORT);
324	}
325
326	memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost));
327
328	return (0);
329
330bad:
331	m_freem(m);
332	return (error);
333}
334
335struct mbuf*
336ether_encap(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
337    struct rtentry *rt, int *errorp)
338{
339	struct ether_header eh;
340	int error;
341
342	error = ether_resolve(ifp, m, dst, rt, &eh);
343	switch (error) {
344	case 0:
345		break;
346	case EAGAIN:
347		error = 0;
348	default:
349		*errorp = error;
350		return (NULL);
351	}
352
353	m = m_prepend(m, ETHER_ALIGN + sizeof(eh), M_DONTWAIT);
354	if (m == NULL) {
355		*errorp = ENOBUFS;
356		return (NULL);
357	}
358
359	m_adj(m, ETHER_ALIGN);
360	memcpy(mtod(m, struct ether_header *), &eh, sizeof(eh));
361
362	return (m);
363}
364
365int
366ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
367    struct rtentry *rt)
368{
369	int error;
370
371	m = ether_encap(ifp, m, dst, rt, &error);
372	if (m == NULL)
373		return (error);
374
375	return (if_enqueue(ifp, m));
376}
377
378/*
379 * Process a received Ethernet packet.
380 *
381 * Ethernet input has several "phases" of filtering packets to
382 * support virtual/pseudo interfaces before actual layer 3 protocol
383 * handling.
384 *
385 * First phase:
386 *
387 * The first phase supports drivers that aggregate multiple Ethernet
388 * ports into a single logical interface, ie, aggr(4) and trunk(4).
389 * These drivers intercept packets by swapping out the if_input handler
390 * on the "port" interfaces to steal the packets before they get here
391 * to ether_input().
392 */
393void
394ether_input(struct ifnet *ifp, struct mbuf *m)
395{
396	struct ether_header *eh;
397	void (*input)(struct ifnet *, struct mbuf *);
398	u_int16_t etype;
399	struct arpcom *ac;
400	const struct ether_brport *eb;
401	unsigned int sdelim = 0;
402	uint64_t dst, self;
403
404	/* Drop short frames */
405	if (m->m_len < ETHER_HDR_LEN)
406		goto dropanyway;
407
408	/*
409	 * Second phase: service delimited packet filtering.
410	 *
411	 * Let vlan(4) and svlan(4) look at "service delimited"
412	 * packets. If a virtual interface does not exist to take
413	 * those packets, they're returned to ether_input() so a
414	 * bridge can have a go at forwarding them.
415	 */
416
417	eh = mtod(m, struct ether_header *);
418	dst = ether_addr_to_e64((struct ether_addr *)eh->ether_dhost);
419	etype = ntohs(eh->ether_type);
420
421	if (ISSET(m->m_flags, M_VLANTAG) ||
422	    etype == ETHERTYPE_VLAN || etype == ETHERTYPE_QINQ) {
423#if NVLAN > 0
424		m = vlan_input(ifp, m, &sdelim);
425		if (m == NULL)
426			return;
427#else
428		sdelim = 1;
429#endif
430	}
431
432	/*
433	 * Third phase: bridge processing.
434	 *
435	 * Give the packet to a bridge interface, ie, bridge(4),
436	 * veb(4), or tpmr(4), if it is configured. A bridge
437	 * may take the packet and forward it to another port, or it
438	 * may return it here to ether_input() to support local
439	 * delivery to this port.
440	 */
441
442	ac = (struct arpcom *)ifp;
443
444	smr_read_enter();
445	eb = SMR_PTR_GET(&ac->ac_brport);
446	if (eb != NULL)
447		eb->eb_port_take(eb->eb_port);
448	smr_read_leave();
449	if (eb != NULL) {
450		m = (*eb->eb_input)(ifp, m, dst, eb->eb_port);
451		eb->eb_port_rele(eb->eb_port);
452		if (m == NULL) {
453			return;
454		}
455	}
456
457	/*
458	 * Fourth phase: drop service delimited packets.
459	 *
460	 * If the packet has a tag, and a bridge didn't want it,
461	 * it's not for this port.
462	 */
463
464	if (sdelim)
465		goto dropanyway;
466
467	/*
468	 * Fifth phase: destination address check.
469	 *
470	 * Is the packet specifically addressed to this port?
471	 */
472
473	eh = mtod(m, struct ether_header *);
474	self = ether_addr_to_e64((struct ether_addr *)ac->ac_enaddr);
475	if (dst != self) {
476#if NCARP > 0
477		/*
478		 * If it's not for this port, it could be for carp(4).
479		 */
480		if (ifp->if_type != IFT_CARP &&
481		    !SRPL_EMPTY_LOCKED(&ifp->if_carp)) {
482			m = carp_input(ifp, m, dst);
483			if (m == NULL)
484				return;
485
486			eh = mtod(m, struct ether_header *);
487		}
488#endif
489
490		/*
491		 * If not, it must be multicast or broadcast to go further.
492		 */
493		if (!ETH64_IS_MULTICAST(dst))
494			goto dropanyway;
495
496		/*
497		 * If this is not a simplex interface, drop the packet
498		 * if it came from us.
499		 */
500		if ((ifp->if_flags & IFF_SIMPLEX) == 0) {
501			uint64_t src = ether_addr_to_e64(
502			    (struct ether_addr *)eh->ether_shost);
503			if (self == src)
504				goto dropanyway;
505		}
506
507		SET(m->m_flags, ETH64_IS_BROADCAST(dst) ? M_BCAST : M_MCAST);
508		ifp->if_imcasts++;
509	}
510
511	/*
512	 * Sixth phase: protocol demux.
513	 *
514	 * At this point it is known that the packet is destined
515	 * for layer 3 protocol handling on the local port.
516	 */
517	etype = ntohs(eh->ether_type);
518
519	switch (etype) {
520	case ETHERTYPE_IP:
521		input = ipv4_input;
522		break;
523
524	case ETHERTYPE_ARP:
525		if (ifp->if_flags & IFF_NOARP)
526			goto dropanyway;
527		input = arpinput;
528		break;
529
530	case ETHERTYPE_REVARP:
531		if (ifp->if_flags & IFF_NOARP)
532			goto dropanyway;
533		input = revarpinput;
534		break;
535
536#ifdef INET6
537	/*
538	 * Schedule IPv6 software interrupt for incoming IPv6 packet.
539	 */
540	case ETHERTYPE_IPV6:
541		input = ipv6_input;
542		break;
543#endif /* INET6 */
544#if NPPPOE > 0 || defined(PIPEX)
545	case ETHERTYPE_PPPOEDISC:
546	case ETHERTYPE_PPPOE:
547		if (m->m_flags & (M_MCAST | M_BCAST))
548			goto dropanyway;
549#ifdef PIPEX
550		if (pipex_enable) {
551			struct pipex_session *session;
552
553			if ((session = pipex_pppoe_lookup_session(m)) != NULL) {
554				pipex_pppoe_input(m, session);
555				pipex_rele_session(session);
556				return;
557			}
558		}
559#endif
560		if (etype == ETHERTYPE_PPPOEDISC) {
561			if (mq_enqueue(&pppoediscinq, m) == 0)
562				schednetisr(NETISR_PPPOE);
563		} else {
564			if (mq_enqueue(&pppoeinq, m) == 0)
565				schednetisr(NETISR_PPPOE);
566		}
567		return;
568#endif
569#ifdef MPLS
570	case ETHERTYPE_MPLS:
571	case ETHERTYPE_MPLS_MCAST:
572		input = mpls_input;
573		break;
574#endif
575#if NBPE > 0
576	case ETHERTYPE_PBB:
577		bpe_input(ifp, m);
578		return;
579#endif
580	default:
581		goto dropanyway;
582	}
583
584	m_adj(m, sizeof(*eh));
585	(*input)(ifp, m);
586	return;
587dropanyway:
588	m_freem(m);
589	return;
590}
591
592int
593ether_brport_isset(struct ifnet *ifp)
594{
595	struct arpcom *ac = (struct arpcom *)ifp;
596
597	KERNEL_ASSERT_LOCKED();
598	if (SMR_PTR_GET_LOCKED(&ac->ac_brport) != NULL)
599		return (EBUSY);
600
601	return (0);
602}
603
604void
605ether_brport_set(struct ifnet *ifp, const struct ether_brport *eb)
606{
607	struct arpcom *ac = (struct arpcom *)ifp;
608
609	KERNEL_ASSERT_LOCKED();
610	KASSERTMSG(SMR_PTR_GET_LOCKED(&ac->ac_brport) == NULL,
611	    "%s setting an already set brport", ifp->if_xname);
612
613	SMR_PTR_SET_LOCKED(&ac->ac_brport, eb);
614}
615
616void
617ether_brport_clr(struct ifnet *ifp)
618{
619	struct arpcom *ac = (struct arpcom *)ifp;
620
621	KERNEL_ASSERT_LOCKED();
622	KASSERTMSG(SMR_PTR_GET_LOCKED(&ac->ac_brport) != NULL,
623	    "%s clearing an already clear brport", ifp->if_xname);
624
625	SMR_PTR_SET_LOCKED(&ac->ac_brport, NULL);
626}
627
628const struct ether_brport *
629ether_brport_get(struct ifnet *ifp)
630{
631	struct arpcom *ac = (struct arpcom *)ifp;
632	SMR_ASSERT_CRITICAL();
633	return (SMR_PTR_GET(&ac->ac_brport));
634}
635
636const struct ether_brport *
637ether_brport_get_locked(struct ifnet *ifp)
638{
639	struct arpcom *ac = (struct arpcom *)ifp;
640	KERNEL_ASSERT_LOCKED();
641	return (SMR_PTR_GET_LOCKED(&ac->ac_brport));
642}
643
644/*
645 * Convert Ethernet address to printable (loggable) representation.
646 */
647static char digits[] = "0123456789abcdef";
648char *
649ether_sprintf(u_char *ap)
650{
651	int i;
652	static char etherbuf[ETHER_ADDR_LEN * 3];
653	char *cp = etherbuf;
654
655	for (i = 0; i < ETHER_ADDR_LEN; i++) {
656		*cp++ = digits[*ap >> 4];
657		*cp++ = digits[*ap++ & 0xf];
658		*cp++ = ':';
659	}
660	*--cp = 0;
661	return (etherbuf);
662}
663
664/*
665 * Generate a (hopefully) acceptable MAC address, if asked.
666 */
667void
668ether_fakeaddr(struct ifnet *ifp)
669{
670	static int unit;
671	int rng = arc4random();
672
673	/* Non-multicast; locally administered address */
674	((struct arpcom *)ifp)->ac_enaddr[0] = 0xfe;
675	((struct arpcom *)ifp)->ac_enaddr[1] = 0xe1;
676	((struct arpcom *)ifp)->ac_enaddr[2] = 0xba;
677	((struct arpcom *)ifp)->ac_enaddr[3] = 0xd0 | (unit++ & 0xf);
678	((struct arpcom *)ifp)->ac_enaddr[4] = rng;
679	((struct arpcom *)ifp)->ac_enaddr[5] = rng >> 8;
680}
681
682/*
683 * Perform common duties while attaching to interface list
684 */
685void
686ether_ifattach(struct ifnet *ifp)
687{
688	struct arpcom *ac = (struct arpcom *)ifp;
689
690	/*
691	 * Any interface which provides a MAC address which is obviously
692	 * invalid gets whacked, so that users will notice.
693	 */
694	if (ETHER_IS_MULTICAST(((struct arpcom *)ifp)->ac_enaddr))
695		ether_fakeaddr(ifp);
696
697	ifp->if_type = IFT_ETHER;
698	ifp->if_addrlen = ETHER_ADDR_LEN;
699	ifp->if_hdrlen = ETHER_HDR_LEN;
700	ifp->if_mtu = ETHERMTU;
701	ifp->if_input = ether_input;
702	if (ifp->if_output == NULL)
703		ifp->if_output = ether_output;
704	ifp->if_rtrequest = ether_rtrequest;
705
706	if (ifp->if_hardmtu == 0)
707		ifp->if_hardmtu = ETHERMTU;
708
709	if_alloc_sadl(ifp);
710	memcpy(LLADDR(ifp->if_sadl), ac->ac_enaddr, ifp->if_addrlen);
711	LIST_INIT(&ac->ac_multiaddrs);
712#if NBPFILTER > 0
713	bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN);
714#endif
715}
716
717void
718ether_ifdetach(struct ifnet *ifp)
719{
720	struct arpcom *ac = (struct arpcom *)ifp;
721	struct ether_multi *enm;
722
723	/* Undo pseudo-driver changes. */
724	if_deactivate(ifp);
725
726	while (!LIST_EMPTY(&ac->ac_multiaddrs)) {
727		enm = LIST_FIRST(&ac->ac_multiaddrs);
728		LIST_REMOVE(enm, enm_list);
729		free(enm, M_IFMADDR, sizeof *enm);
730	}
731}
732
733#if 0
734/*
735 * This is for reference.  We have table-driven versions of the
736 * crc32 generators, which are faster than the double-loop.
737 */
738u_int32_t __pure
739ether_crc32_le_update(u_int_32_t crc, const u_int8_t *buf, size_t len)
740{
741	u_int32_t c, carry;
742	size_t i, j;
743
744	for (i = 0; i < len; i++) {
745		c = buf[i];
746		for (j = 0; j < 8; j++) {
747			carry = ((crc & 0x01) ? 1 : 0) ^ (c & 0x01);
748			crc >>= 1;
749			c >>= 1;
750			if (carry)
751				crc = (crc ^ ETHER_CRC_POLY_LE);
752		}
753	}
754
755	return (crc);
756}
757
758u_int32_t __pure
759ether_crc32_be_update(u_int_32_t crc, const u_int8_t *buf, size_t len)
760{
761	u_int32_t c, carry;
762	size_t i, j;
763
764	for (i = 0; i < len; i++) {
765		c = buf[i];
766		for (j = 0; j < 8; j++) {
767			carry = ((crc & 0x80000000U) ? 1 : 0) ^ (c & 0x01);
768			crc <<= 1;
769			c >>= 1;
770			if (carry)
771				crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
772		}
773	}
774
775	return (crc);
776}
777#else
778u_int32_t __pure
779ether_crc32_le_update(u_int32_t crc, const u_int8_t *buf, size_t len)
780{
781	static const u_int32_t crctab[] = {
782		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
783		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
784		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
785		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
786	};
787	size_t i;
788
789	for (i = 0; i < len; i++) {
790		crc ^= buf[i];
791		crc = (crc >> 4) ^ crctab[crc & 0xf];
792		crc = (crc >> 4) ^ crctab[crc & 0xf];
793	}
794
795	return (crc);
796}
797
798u_int32_t __pure
799ether_crc32_be_update(u_int32_t crc, const u_int8_t *buf, size_t len)
800{
801	static const u_int8_t rev[] = {
802		0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
803		0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
804	};
805	static const u_int32_t crctab[] = {
806		0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
807		0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
808		0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
809		0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd
810	};
811	size_t i;
812	u_int8_t data;
813
814	for (i = 0; i < len; i++) {
815		data = buf[i];
816		crc = (crc << 4) ^ crctab[(crc >> 28) ^ rev[data & 0xf]];
817		crc = (crc << 4) ^ crctab[(crc >> 28) ^ rev[data >> 4]];
818	}
819
820	return (crc);
821}
822#endif
823
824u_int32_t
825ether_crc32_le(const u_int8_t *buf, size_t len)
826{
827	return ether_crc32_le_update(0xffffffff, buf, len);
828}
829
830u_int32_t
831ether_crc32_be(const u_int8_t *buf, size_t len)
832{
833	return ether_crc32_be_update(0xffffffff, buf, len);
834}
835
836u_char	ether_ipmulticast_min[ETHER_ADDR_LEN] =
837    { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 };
838u_char	ether_ipmulticast_max[ETHER_ADDR_LEN] =
839    { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff };
840
841#ifdef INET6
842u_char	ether_ip6multicast_min[ETHER_ADDR_LEN] =
843    { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 };
844u_char	ether_ip6multicast_max[ETHER_ADDR_LEN] =
845    { 0x33, 0x33, 0xff, 0xff, 0xff, 0xff };
846#endif
847
848/*
849 * Convert a sockaddr into an Ethernet address or range of Ethernet
850 * addresses.
851 */
852int
853ether_multiaddr(struct sockaddr *sa, u_int8_t addrlo[ETHER_ADDR_LEN],
854    u_int8_t addrhi[ETHER_ADDR_LEN])
855{
856	struct sockaddr_in *sin;
857#ifdef INET6
858	struct sockaddr_in6 *sin6;
859#endif /* INET6 */
860
861	switch (sa->sa_family) {
862
863	case AF_UNSPEC:
864		memcpy(addrlo, sa->sa_data, ETHER_ADDR_LEN);
865		memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
866		break;
867
868	case AF_INET:
869		sin = satosin(sa);
870		if (sin->sin_addr.s_addr == INADDR_ANY) {
871			/*
872			 * An IP address of INADDR_ANY means listen to
873			 * or stop listening to all of the Ethernet
874			 * multicast addresses used for IP.
875			 * (This is for the sake of IP multicast routers.)
876			 */
877			memcpy(addrlo, ether_ipmulticast_min, ETHER_ADDR_LEN);
878			memcpy(addrhi, ether_ipmulticast_max, ETHER_ADDR_LEN);
879		} else {
880			ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo);
881			memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
882		}
883		break;
884#ifdef INET6
885	case AF_INET6:
886		sin6 = satosin6(sa);
887		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
888			/*
889			 * An IP6 address of 0 means listen to or stop
890			 * listening to all of the Ethernet multicast
891			 * address used for IP6.
892			 *
893			 * (This might not be healthy, given IPv6's reliance on
894			 * multicast for things like neighbor discovery.
895			 * Perhaps initializing all-nodes, solicited nodes, and
896			 * possibly all-routers for this interface afterwards
897			 * is not a bad idea.)
898			 */
899
900			memcpy(addrlo, ether_ip6multicast_min, ETHER_ADDR_LEN);
901			memcpy(addrhi, ether_ip6multicast_max, ETHER_ADDR_LEN);
902		} else {
903			ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, addrlo);
904			memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
905		}
906		break;
907#endif
908
909	default:
910		return (EAFNOSUPPORT);
911	}
912	return (0);
913}
914
915/*
916 * Add an Ethernet multicast address or range of addresses to the list for a
917 * given interface.
918 */
919int
920ether_addmulti(struct ifreq *ifr, struct arpcom *ac)
921{
922	struct ether_multi *enm;
923	u_char addrlo[ETHER_ADDR_LEN];
924	u_char addrhi[ETHER_ADDR_LEN];
925	int s = splnet(), error;
926
927	error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
928	if (error != 0) {
929		splx(s);
930		return (error);
931	}
932
933	/*
934	 * Verify that we have valid Ethernet multicast addresses.
935	 */
936	if ((addrlo[0] & 0x01) != 1 || (addrhi[0] & 0x01) != 1) {
937		splx(s);
938		return (EINVAL);
939	}
940	/*
941	 * See if the address range is already in the list.
942	 */
943	ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm);
944	if (enm != NULL) {
945		/*
946		 * Found it; just increment the reference count.
947		 */
948		refcnt_take(&enm->enm_refcnt);
949		splx(s);
950		return (0);
951	}
952	/*
953	 * New address or range; malloc a new multicast record
954	 * and link it into the interface's multicast list.
955	 */
956	enm = malloc(sizeof(*enm), M_IFMADDR, M_NOWAIT);
957	if (enm == NULL) {
958		splx(s);
959		return (ENOBUFS);
960	}
961	memcpy(enm->enm_addrlo, addrlo, ETHER_ADDR_LEN);
962	memcpy(enm->enm_addrhi, addrhi, ETHER_ADDR_LEN);
963	refcnt_init_trace(&enm->enm_refcnt, DT_REFCNT_IDX_ETHMULTI);
964	LIST_INSERT_HEAD(&ac->ac_multiaddrs, enm, enm_list);
965	ac->ac_multicnt++;
966	if (memcmp(addrlo, addrhi, ETHER_ADDR_LEN) != 0)
967		ac->ac_multirangecnt++;
968	splx(s);
969	/*
970	 * Return ENETRESET to inform the driver that the list has changed
971	 * and its reception filter should be adjusted accordingly.
972	 */
973	return (ENETRESET);
974}
975
976/*
977 * Delete a multicast address record.
978 */
979int
980ether_delmulti(struct ifreq *ifr, struct arpcom *ac)
981{
982	struct ether_multi *enm;
983	u_char addrlo[ETHER_ADDR_LEN];
984	u_char addrhi[ETHER_ADDR_LEN];
985	int s = splnet(), error;
986
987	error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
988	if (error != 0) {
989		splx(s);
990		return (error);
991	}
992
993	/*
994	 * Look up the address in our list.
995	 */
996	ETHER_LOOKUP_MULTI(addrlo, addrhi, ac, enm);
997	if (enm == NULL) {
998		splx(s);
999		return (ENXIO);
1000	}
1001	if (refcnt_rele(&enm->enm_refcnt) == 0) {
1002		/*
1003		 * Still some claims to this record.
1004		 */
1005		splx(s);
1006		return (0);
1007	}
1008	/*
1009	 * No remaining claims to this record; unlink and free it.
1010	 */
1011	LIST_REMOVE(enm, enm_list);
1012	free(enm, M_IFMADDR, sizeof *enm);
1013	ac->ac_multicnt--;
1014	if (memcmp(addrlo, addrhi, ETHER_ADDR_LEN) != 0)
1015		ac->ac_multirangecnt--;
1016	splx(s);
1017	/*
1018	 * Return ENETRESET to inform the driver that the list has changed
1019	 * and its reception filter should be adjusted accordingly.
1020	 */
1021	return (ENETRESET);
1022}
1023
1024uint64_t
1025ether_addr_to_e64(const struct ether_addr *ea)
1026{
1027	uint64_t e64 = 0;
1028	size_t i;
1029
1030	for (i = 0; i < nitems(ea->ether_addr_octet); i++) {
1031		e64 <<= 8;
1032		e64 |= ea->ether_addr_octet[i];
1033	}
1034
1035	return (e64);
1036}
1037
1038void
1039ether_e64_to_addr(struct ether_addr *ea, uint64_t e64)
1040{
1041	size_t i = nitems(ea->ether_addr_octet);
1042
1043	do {
1044		ea->ether_addr_octet[--i] = e64;
1045		e64 >>= 8;
1046	} while (i > 0);
1047}
1048
1049/* Parse different TCP/IP protocol headers for a quick view inside an mbuf. */
1050void
1051ether_extract_headers(struct mbuf *m0, struct ether_extracted *ext)
1052{
1053	struct mbuf	*m;
1054	size_t		 hlen, iplen;
1055	int		 hoff;
1056	uint8_t		 ipproto;
1057	uint16_t	 ether_type;
1058	/* gcc 4.2.1 on sparc64 may create 32 bit loads on unaligned mbuf */
1059	union {
1060		u_char	hc_data;
1061#if _BYTE_ORDER == _LITTLE_ENDIAN
1062		struct {
1063			u_int	hl:4,	/* header length */
1064				v:4;	/* version */
1065		} hc_ip;
1066		struct {
1067			u_int	x2:4,	/* (unused) */
1068				off:4;	/* data offset */
1069		} hc_th;
1070#endif
1071#if _BYTE_ORDER == _BIG_ENDIAN
1072		struct {
1073			u_int	v:4,	/* version */
1074				hl:4;	/* header length */
1075		} hc_ip;
1076		struct {
1077			u_int	off:4,	/* data offset */
1078				x2:4;	/* (unused) */
1079		} hc_th;
1080#endif
1081	} hdrcpy;
1082
1083	/* Return NULL if header was not recognized. */
1084	memset(ext, 0, sizeof(*ext));
1085
1086	KASSERT(ISSET(m0->m_flags, M_PKTHDR));
1087	ext->paylen = m0->m_pkthdr.len;
1088
1089	if (m0->m_len < sizeof(*ext->eh)) {
1090		DPRINTF("m_len %d, eh %zu", m0->m_len, sizeof(*ext->eh));
1091		return;
1092	}
1093	ext->eh = mtod(m0, struct ether_header *);
1094	ether_type = ntohs(ext->eh->ether_type);
1095	hlen = sizeof(*ext->eh);
1096	if (ext->paylen < hlen) {
1097		DPRINTF("paylen %u, ehlen %zu", ext->paylen, hlen);
1098		ext->eh = NULL;
1099		return;
1100	}
1101	ext->paylen -= hlen;
1102
1103#if NVLAN > 0
1104	if (ether_type == ETHERTYPE_VLAN) {
1105		if (m0->m_len < sizeof(*ext->evh)) {
1106			DPRINTF("m_len %d, evh %zu",
1107			    m0->m_len, sizeof(*ext->evh));
1108			return;
1109		}
1110		ext->evh = mtod(m0, struct ether_vlan_header *);
1111		ether_type = ntohs(ext->evh->evl_proto);
1112		hlen = sizeof(*ext->evh);
1113		if (sizeof(*ext->eh) + ext->paylen < hlen) {
1114			DPRINTF("paylen %zu, evhlen %zu",
1115			    sizeof(*ext->eh) + ext->paylen, hlen);
1116			ext->evh = NULL;
1117			return;
1118		}
1119		ext->paylen = sizeof(*ext->eh) + ext->paylen - hlen;
1120	}
1121#endif
1122
1123	switch (ether_type) {
1124	case ETHERTYPE_IP:
1125		m = m_getptr(m0, hlen, &hoff);
1126		if (m == NULL || m->m_len - hoff < sizeof(*ext->ip4)) {
1127			DPRINTF("m_len %d, hoff %d, ip4 %zu",
1128			    m ? m->m_len : -1, hoff, sizeof(*ext->ip4));
1129			return;
1130		}
1131		ext->ip4 = (struct ip *)(mtod(m, caddr_t) + hoff);
1132
1133		memcpy(&hdrcpy.hc_data, ext->ip4, 1);
1134		hlen = hdrcpy.hc_ip.hl << 2;
1135		if (m->m_len - hoff < hlen) {
1136			DPRINTF("m_len %d, hoff %d, iphl %zu",
1137			    m ? m->m_len : -1, hoff, hlen);
1138			ext->ip4 = NULL;
1139			return;
1140		}
1141		if (ext->paylen < hlen) {
1142			DPRINTF("paylen %u, ip4hlen %zu", ext->paylen, hlen);
1143			ext->ip4 = NULL;
1144			return;
1145		}
1146		iplen = ntohs(ext->ip4->ip_len);
1147		if (ext->paylen < iplen) {
1148			DPRINTF("paylen %u, ip4len %zu", ext->paylen, iplen);
1149			ext->ip4 = NULL;
1150			return;
1151		}
1152		if (iplen < hlen) {
1153			DPRINTF("ip4len %zu, ip4hlen %zu", iplen, hlen);
1154			ext->ip4 = NULL;
1155			return;
1156		}
1157		ext->iplen = iplen;
1158		ext->iphlen = hlen;
1159		ext->paylen -= hlen;
1160		ipproto = ext->ip4->ip_p;
1161
1162		if (ISSET(ntohs(ext->ip4->ip_off), IP_MF|IP_OFFMASK))
1163			return;
1164		break;
1165#ifdef INET6
1166	case ETHERTYPE_IPV6:
1167		m = m_getptr(m0, hlen, &hoff);
1168		if (m == NULL || m->m_len - hoff < sizeof(*ext->ip6)) {
1169			DPRINTF("m_len %d, hoff %d, ip6 %zu",
1170			    m ? m->m_len : -1, hoff, sizeof(*ext->ip6));
1171			return;
1172		}
1173		ext->ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + hoff);
1174
1175		hlen = sizeof(*ext->ip6);
1176		if (ext->paylen < hlen) {
1177			DPRINTF("paylen %u, ip6hlen %zu", ext->paylen, hlen);
1178			ext->ip6 = NULL;
1179			return;
1180		}
1181		iplen = hlen + ntohs(ext->ip6->ip6_plen);
1182		if (ext->paylen < iplen) {
1183			DPRINTF("paylen %u, ip6len %zu", ext->paylen, iplen);
1184			ext->ip6 = NULL;
1185			return;
1186		}
1187		ext->iplen = iplen;
1188		ext->iphlen = hlen;
1189		ext->paylen -= hlen;
1190		ipproto = ext->ip6->ip6_nxt;
1191		break;
1192#endif
1193	default:
1194		return;
1195	}
1196
1197	switch (ipproto) {
1198	case IPPROTO_TCP:
1199		m = m_getptr(m, hoff + hlen, &hoff);
1200		if (m == NULL || m->m_len - hoff < sizeof(*ext->tcp)) {
1201			DPRINTF("m_len %d, hoff %d, tcp %zu",
1202			    m ? m->m_len : -1, hoff, sizeof(*ext->tcp));
1203			return;
1204		}
1205		ext->tcp = (struct tcphdr *)(mtod(m, caddr_t) + hoff);
1206
1207		memcpy(&hdrcpy.hc_data, &ext->tcp->th_flags - 1, 1);
1208		hlen = hdrcpy.hc_th.off << 2;
1209		if (m->m_len - hoff < hlen) {
1210			DPRINTF("m_len %d, hoff %d, thoff %zu",
1211			    m ? m->m_len : -1, hoff, hlen);
1212			ext->tcp = NULL;
1213			return;
1214		}
1215		if (ext->iplen - ext->iphlen < hlen) {
1216			DPRINTF("iplen %u, iphlen %u, tcphlen %zu",
1217			    ext->iplen, ext->iphlen, hlen);
1218			ext->tcp = NULL;
1219			return;
1220		}
1221		ext->tcphlen = hlen;
1222		ext->paylen -= hlen;
1223		break;
1224
1225	case IPPROTO_UDP:
1226		m = m_getptr(m, hoff + hlen, &hoff);
1227		if (m == NULL || m->m_len - hoff < sizeof(*ext->udp)) {
1228			DPRINTF("m_len %d, hoff %d, tcp %zu",
1229			    m ? m->m_len : -1, hoff, sizeof(*ext->tcp));
1230			return;
1231		}
1232		ext->udp = (struct udphdr *)(mtod(m, caddr_t) + hoff);
1233
1234		hlen = sizeof(*ext->udp);
1235		if (ext->iplen - ext->iphlen < hlen) {
1236			DPRINTF("iplen %u, iphlen %u, udphlen %zu",
1237			    ext->iplen, ext->iphlen, hlen);
1238			ext->udp = NULL;
1239			return;
1240		}
1241		break;
1242	}
1243
1244	DNPRINTF(2, "%s%s%s%s%s%s ip %u, iph %u, tcph %u, payl %u",
1245	    ext->eh ? "eh," : "", ext->evh ? "evh," : "",
1246	    ext->ip4 ? "ip4," : "", ext->ip6 ? "ip6," : "",
1247	    ext->tcp ? "tcp," : "", ext->udp ? "udp," : "",
1248	    ext->iplen, ext->iphlen, ext->tcphlen, ext->paylen);
1249}
1250