if_ethersubr.c revision 1.314
1/*	$NetBSD: if_ethersubr.c,v 1.314 2022/06/20 08:20:09 yamaguchi Exp $	*/
2
3/*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32/*
33 * Copyright (c) 1982, 1989, 1993
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 *    notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 *    notice, this list of conditions and the following disclaimer in the
43 *    documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 *    may be used to endorse or promote products derived from this software
46 *    without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 *	@(#)if_ethersubr.c	8.2 (Berkeley) 4/4/96
61 */
62
63#include <sys/cdefs.h>
64__KERNEL_RCSID(0, "$NetBSD: if_ethersubr.c,v 1.314 2022/06/20 08:20:09 yamaguchi Exp $");
65
66#ifdef _KERNEL_OPT
67#include "opt_inet.h"
68#include "opt_atalk.h"
69#include "opt_mbuftrace.h"
70#include "opt_mpls.h"
71#include "opt_gateway.h"
72#include "opt_pppoe.h"
73#include "opt_net_mpsafe.h"
74#endif
75
76#include "vlan.h"
77#include "pppoe.h"
78#include "bridge.h"
79#include "arp.h"
80#include "agr.h"
81
82#include <sys/sysctl.h>
83#include <sys/mbuf.h>
84#include <sys/mutex.h>
85#include <sys/ioctl.h>
86#include <sys/errno.h>
87#include <sys/device.h>
88#include <sys/entropy.h>
89#include <sys/rndsource.h>
90#include <sys/cpu.h>
91#include <sys/kmem.h>
92#include <sys/hook.h>
93
94#include <net/if.h>
95#include <net/netisr.h>
96#include <net/route.h>
97#include <net/if_llc.h>
98#include <net/if_dl.h>
99#include <net/if_types.h>
100#include <net/pktqueue.h>
101
102#include <net/if_media.h>
103#include <dev/mii/mii.h>
104#include <dev/mii/miivar.h>
105
106#if NARP == 0
107/*
108 * XXX there should really be a way to issue this warning from within config(8)
109 */
110#error You have included NETATALK or a pseudo-device in your configuration that depends on the presence of ethernet interfaces, but have no such interfaces configured. Check if you really need pseudo-device bridge, pppoe, vlan or options NETATALK.
111#endif
112
113#include <net/bpf.h>
114
115#include <net/if_ether.h>
116#include <net/if_vlanvar.h>
117
118#if NPPPOE > 0
119#include <net/if_pppoe.h>
120#endif
121
122#if NAGR > 0
123#include <net/ether_slowprotocols.h>
124#include <net/agr/ieee8023ad.h>
125#include <net/agr/if_agrvar.h>
126#endif
127
128#if NBRIDGE > 0
129#include <net/if_bridgevar.h>
130#endif
131
132#include <netinet/in.h>
133#ifdef INET
134#include <netinet/in_var.h>
135#endif
136#include <netinet/if_inarp.h>
137
138#ifdef INET6
139#ifndef INET
140#include <netinet/in.h>
141#endif
142#include <netinet6/in6_var.h>
143#include <netinet6/nd6.h>
144#endif
145
146#include "carp.h"
147#if NCARP > 0
148#include <netinet/ip_carp.h>
149#endif
150
151#ifdef NETATALK
152#include <netatalk/at.h>
153#include <netatalk/at_var.h>
154#include <netatalk/at_extern.h>
155
156#define llc_snap_org_code llc_un.type_snap.org_code
157#define llc_snap_ether_type llc_un.type_snap.ether_type
158
159extern u_char	at_org_code[3];
160extern u_char	aarp_org_code[3];
161#endif /* NETATALK */
162
163#ifdef MPLS
164#include <netmpls/mpls.h>
165#include <netmpls/mpls_var.h>
166#endif
167
168CTASSERT(sizeof(struct ether_addr) == 6);
169CTASSERT(sizeof(struct ether_header) == 14);
170
171#ifdef DIAGNOSTIC
172static struct timeval bigpktppslim_last;
173static int bigpktppslim = 2;	/* XXX */
174static int bigpktpps_count;
175static kmutex_t bigpktpps_lock __cacheline_aligned;
176#endif
177
178const uint8_t etherbroadcastaddr[ETHER_ADDR_LEN] =
179    { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
180const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
181    { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
182#define senderr(e) { error = (e); goto bad;}
183
184static pktq_rps_hash_func_t ether_pktq_rps_hash_p;
185
186static int ether_output(struct ifnet *, struct mbuf *,
187    const struct sockaddr *, const struct rtentry *);
188
189/*
190 * Ethernet output routine.
191 * Encapsulate a packet of type family for the local net.
192 * Assumes that ifp is actually pointer to ethercom structure.
193 */
194static int
195ether_output(struct ifnet * const ifp0, struct mbuf * const m0,
196    const struct sockaddr * const dst, const struct rtentry *rt)
197{
198	uint8_t esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
199	uint16_t etype = 0;
200	int error = 0, hdrcmplt = 0;
201	struct mbuf *m = m0;
202	struct mbuf *mcopy = NULL;
203	struct ether_header *eh;
204	struct ifnet *ifp = ifp0;
205#ifdef INET
206	struct arphdr *ah;
207#endif
208#ifdef NETATALK
209	struct at_ifaddr *aa;
210#endif
211
212#ifdef MBUFTRACE
213	m_claimm(m, ifp->if_mowner);
214#endif
215
216#if NCARP > 0
217	if (ifp->if_type == IFT_CARP) {
218		struct ifaddr *ifa;
219		int s = pserialize_read_enter();
220
221		/* loop back if this is going to the carp interface */
222		if (dst != NULL && ifp0->if_link_state == LINK_STATE_UP &&
223		    (ifa = ifa_ifwithaddr(dst)) != NULL) {
224			if (ifa->ifa_ifp == ifp0) {
225				pserialize_read_exit(s);
226				return looutput(ifp0, m, dst, rt);
227			}
228		}
229		pserialize_read_exit(s);
230
231		ifp = ifp->if_carpdev;
232		/* ac = (struct arpcom *)ifp; */
233
234		if ((ifp0->if_flags & (IFF_UP | IFF_RUNNING)) !=
235		    (IFF_UP | IFF_RUNNING))
236			senderr(ENETDOWN);
237	}
238#endif
239
240	if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING))
241		senderr(ENETDOWN);
242
243	switch (dst->sa_family) {
244
245#ifdef INET
246	case AF_INET:
247		if (m->m_flags & M_BCAST) {
248			memcpy(edst, etherbroadcastaddr, sizeof(edst));
249		} else if (m->m_flags & M_MCAST) {
250			ETHER_MAP_IP_MULTICAST(&satocsin(dst)->sin_addr, edst);
251		} else {
252			error = arpresolve(ifp0, rt, m, dst, edst, sizeof(edst));
253			if (error)
254				return (error == EWOULDBLOCK) ? 0 : error;
255		}
256		/* If broadcasting on a simplex interface, loopback a copy */
257		if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
258			mcopy = m_copypacket(m, M_DONTWAIT);
259		etype = htons(ETHERTYPE_IP);
260		break;
261
262	case AF_ARP:
263		ah = mtod(m, struct arphdr *);
264		if (m->m_flags & M_BCAST) {
265			memcpy(edst, etherbroadcastaddr, sizeof(edst));
266		} else {
267			void *tha = ar_tha(ah);
268
269			if (tha == NULL) {
270				/* fake with ARPHRD_IEEE1394 */
271				m_freem(m);
272				return 0;
273			}
274			memcpy(edst, tha, sizeof(edst));
275		}
276
277		ah->ar_hrd = htons(ARPHRD_ETHER);
278
279		switch (ntohs(ah->ar_op)) {
280		case ARPOP_REVREQUEST:
281		case ARPOP_REVREPLY:
282			etype = htons(ETHERTYPE_REVARP);
283			break;
284
285		case ARPOP_REQUEST:
286		case ARPOP_REPLY:
287		default:
288			etype = htons(ETHERTYPE_ARP);
289		}
290		break;
291#endif
292
293#ifdef INET6
294	case AF_INET6:
295		if (m->m_flags & M_BCAST) {
296			memcpy(edst, etherbroadcastaddr, sizeof(edst));
297		} else if (m->m_flags & M_MCAST) {
298			ETHER_MAP_IPV6_MULTICAST(&satocsin6(dst)->sin6_addr,
299			    edst);
300		} else {
301			error = nd6_resolve(ifp0, rt, m, dst, edst,
302			    sizeof(edst));
303			if (error)
304				return (error == EWOULDBLOCK) ? 0 : error;
305		}
306		etype = htons(ETHERTYPE_IPV6);
307		break;
308#endif
309
310#ifdef NETATALK
311	case AF_APPLETALK: {
312		struct ifaddr *ifa;
313		int s;
314
315		KERNEL_LOCK(1, NULL);
316
317		if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst)) {
318			KERNEL_UNLOCK_ONE(NULL);
319			return 0;
320		}
321
322		/*
323		 * ifaddr is the first thing in at_ifaddr
324		 */
325		s = pserialize_read_enter();
326		ifa = at_ifawithnet((const struct sockaddr_at *)dst, ifp);
327		if (ifa == NULL) {
328			pserialize_read_exit(s);
329			KERNEL_UNLOCK_ONE(NULL);
330			senderr(EADDRNOTAVAIL);
331		}
332		aa = (struct at_ifaddr *)ifa;
333
334		/*
335		 * In the phase 2 case, we need to prepend an mbuf for the
336		 * llc header.
337		 */
338		if (aa->aa_flags & AFA_PHASE2) {
339			struct llc llc;
340
341			M_PREPEND(m, sizeof(struct llc), M_DONTWAIT);
342			if (m == NULL) {
343				pserialize_read_exit(s);
344				KERNEL_UNLOCK_ONE(NULL);
345				senderr(ENOBUFS);
346			}
347
348			llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
349			llc.llc_control = LLC_UI;
350			memcpy(llc.llc_snap_org_code, at_org_code,
351			    sizeof(llc.llc_snap_org_code));
352			llc.llc_snap_ether_type = htons(ETHERTYPE_ATALK);
353			memcpy(mtod(m, void *), &llc, sizeof(struct llc));
354		} else {
355			etype = htons(ETHERTYPE_ATALK);
356		}
357		pserialize_read_exit(s);
358		KERNEL_UNLOCK_ONE(NULL);
359		break;
360	}
361#endif /* NETATALK */
362
363	case pseudo_AF_HDRCMPLT:
364		hdrcmplt = 1;
365		memcpy(esrc,
366		    ((const struct ether_header *)dst->sa_data)->ether_shost,
367		    sizeof(esrc));
368		/* FALLTHROUGH */
369
370	case AF_UNSPEC:
371		memcpy(edst,
372		    ((const struct ether_header *)dst->sa_data)->ether_dhost,
373		    sizeof(edst));
374		/* AF_UNSPEC doesn't swap the byte order of the ether_type. */
375		etype = ((const struct ether_header *)dst->sa_data)->ether_type;
376		break;
377
378	default:
379		printf("%s: can't handle af%d\n", ifp->if_xname,
380		    dst->sa_family);
381		senderr(EAFNOSUPPORT);
382	}
383
384#ifdef MPLS
385	{
386		struct m_tag *mtag;
387		mtag = m_tag_find(m, PACKET_TAG_MPLS);
388		if (mtag != NULL) {
389			/* Having the tag itself indicates it's MPLS */
390			etype = htons(ETHERTYPE_MPLS);
391			m_tag_delete(m, mtag);
392		}
393	}
394#endif
395
396	if (mcopy)
397		(void)looutput(ifp, mcopy, dst, rt);
398
399	KASSERT((m->m_flags & M_PKTHDR) != 0);
400
401	/*
402	 * If no ether type is set, this must be a 802.2 formatted packet.
403	 */
404	if (etype == 0)
405		etype = htons(m->m_pkthdr.len);
406
407	/*
408	 * Add local net header. If no space in first mbuf, allocate another.
409	 */
410	M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
411	if (m == NULL)
412		senderr(ENOBUFS);
413
414	eh = mtod(m, struct ether_header *);
415	/* Note: etype is already in network byte order. */
416	memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
417	memcpy(eh->ether_dhost, edst, sizeof(edst));
418	if (hdrcmplt) {
419		memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
420	} else {
421	 	memcpy(eh->ether_shost, CLLADDR(ifp->if_sadl),
422		    sizeof(eh->ether_shost));
423	}
424
425#if NCARP > 0
426	if (ifp0 != ifp && ifp0->if_type == IFT_CARP) {
427	 	memcpy(eh->ether_shost, CLLADDR(ifp0->if_sadl),
428		    sizeof(eh->ether_shost));
429	}
430#endif
431
432	if ((error = pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_OUT)) != 0)
433		return error;
434	if (m == NULL)
435		return 0;
436
437#if NBRIDGE > 0
438	/*
439	 * Bridges require special output handling.
440	 */
441	if (ifp->if_bridge)
442		return bridge_output(ifp, m, NULL, NULL);
443#endif
444
445#if NCARP > 0
446	if (ifp != ifp0)
447		if_statadd(ifp0, if_obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
448#endif
449
450#ifdef ALTQ
451	KERNEL_LOCK(1, NULL);
452	/*
453	 * If ALTQ is enabled on the parent interface, do
454	 * classification; the queueing discipline might not
455	 * require classification, but might require the
456	 * address family/header pointer in the pktattr.
457	 */
458	if (ALTQ_IS_ENABLED(&ifp->if_snd))
459		altq_etherclassify(&ifp->if_snd, m);
460	KERNEL_UNLOCK_ONE(NULL);
461#endif
462	return ifq_enqueue(ifp, m);
463
464bad:
465	if_statinc(ifp, if_oerrors);
466	if (m)
467		m_freem(m);
468	return error;
469}
470
471#ifdef ALTQ
472/*
473 * This routine is a slight hack to allow a packet to be classified
474 * if the Ethernet headers are present.  It will go away when ALTQ's
475 * classification engine understands link headers.
476 *
477 * XXX: We may need to do m_pullups here. First to ensure struct ether_header
478 * is indeed contiguous, then to read the LLC and so on.
479 */
480void
481altq_etherclassify(struct ifaltq *ifq, struct mbuf *m)
482{
483	struct ether_header *eh;
484	struct mbuf *mtop = m;
485	uint16_t ether_type;
486	int hlen, af, hdrsize;
487	void *hdr;
488
489	KASSERT((mtop->m_flags & M_PKTHDR) != 0);
490
491	hlen = ETHER_HDR_LEN;
492	eh = mtod(m, struct ether_header *);
493
494	ether_type = htons(eh->ether_type);
495
496	if (ether_type < ETHERMTU) {
497		/* LLC/SNAP */
498		struct llc *llc = (struct llc *)(eh + 1);
499		hlen += 8;
500
501		if (m->m_len < hlen ||
502		    llc->llc_dsap != LLC_SNAP_LSAP ||
503		    llc->llc_ssap != LLC_SNAP_LSAP ||
504		    llc->llc_control != LLC_UI) {
505			/* Not SNAP. */
506			goto bad;
507		}
508
509		ether_type = htons(llc->llc_un.type_snap.ether_type);
510	}
511
512	switch (ether_type) {
513	case ETHERTYPE_IP:
514		af = AF_INET;
515		hdrsize = 20;		/* sizeof(struct ip) */
516		break;
517
518	case ETHERTYPE_IPV6:
519		af = AF_INET6;
520		hdrsize = 40;		/* sizeof(struct ip6_hdr) */
521		break;
522
523	default:
524		af = AF_UNSPEC;
525		hdrsize = 0;
526		break;
527	}
528
529	while (m->m_len <= hlen) {
530		hlen -= m->m_len;
531		m = m->m_next;
532		if (m == NULL)
533			goto bad;
534	}
535
536	if (m->m_len < (hlen + hdrsize)) {
537		/*
538		 * protocol header not in a single mbuf.
539		 * We can't cope with this situation right
540		 * now (but it shouldn't ever happen, really, anyhow).
541		 */
542#ifdef DEBUG
543		printf("altq_etherclassify: headers span multiple mbufs: "
544		    "%d < %d\n", m->m_len, (hlen + hdrsize));
545#endif
546		goto bad;
547	}
548
549	m->m_data += hlen;
550	m->m_len -= hlen;
551
552	hdr = mtod(m, void *);
553
554	if (ALTQ_NEEDS_CLASSIFY(ifq)) {
555		mtop->m_pkthdr.pattr_class =
556		    (*ifq->altq_classify)(ifq->altq_clfier, m, af);
557	}
558	mtop->m_pkthdr.pattr_af = af;
559	mtop->m_pkthdr.pattr_hdr = hdr;
560
561	m->m_data -= hlen;
562	m->m_len += hlen;
563
564	return;
565
566bad:
567	mtop->m_pkthdr.pattr_class = NULL;
568	mtop->m_pkthdr.pattr_hdr = NULL;
569	mtop->m_pkthdr.pattr_af = AF_UNSPEC;
570}
571#endif /* ALTQ */
572
573#if defined (LLC) || defined (NETATALK)
574static void
575ether_input_llc(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh)
576{
577	struct ifqueue *inq = NULL;
578	int isr = 0;
579	struct llc *l;
580
581	if (m->m_len < sizeof(*eh) + sizeof(struct llc))
582		goto error;
583
584	l = (struct llc *)(eh+1);
585	switch (l->llc_dsap) {
586#ifdef NETATALK
587	case LLC_SNAP_LSAP:
588		switch (l->llc_control) {
589		case LLC_UI:
590			if (l->llc_ssap != LLC_SNAP_LSAP)
591				goto error;
592
593			if (memcmp(&(l->llc_snap_org_code)[0],
594			    at_org_code, sizeof(at_org_code)) == 0 &&
595			    ntohs(l->llc_snap_ether_type) ==
596			    ETHERTYPE_ATALK) {
597				inq = &atintrq2;
598				m_adj(m, sizeof(struct ether_header)
599				    + sizeof(struct llc));
600				isr = NETISR_ATALK;
601				break;
602			}
603
604			if (memcmp(&(l->llc_snap_org_code)[0],
605			    aarp_org_code,
606			    sizeof(aarp_org_code)) == 0 &&
607			    ntohs(l->llc_snap_ether_type) ==
608			    ETHERTYPE_AARP) {
609				m_adj(m, sizeof(struct ether_header)
610				    + sizeof(struct llc));
611				aarpinput(ifp, m); /* XXX queue? */
612				return;
613			}
614
615		default:
616			goto error;
617		}
618		break;
619#endif
620	default:
621		goto noproto;
622	}
623
624	KASSERT(inq != NULL);
625	IFQ_ENQUEUE_ISR(inq, m, isr);
626	return;
627
628noproto:
629	m_freem(m);
630	if_statinc(ifp, if_noproto);
631	return;
632error:
633	m_freem(m);
634	if_statinc(ifp, if_ierrors);
635	return;
636}
637#endif /* defined (LLC) || defined (NETATALK) */
638
639/*
640 * Process a received Ethernet packet;
641 * the packet is in the mbuf chain m with
642 * the ether header.
643 */
644void
645ether_input(struct ifnet *ifp, struct mbuf *m)
646{
647	struct ethercom *ec = (struct ethercom *) ifp;
648	pktqueue_t *pktq = NULL;
649	struct ifqueue *inq = NULL;
650	uint16_t etype;
651	struct ether_header *eh;
652	size_t ehlen;
653	static int earlypkts;
654	int isr = 0;
655
656	KASSERT(!cpu_intr_p());
657	KASSERT((m->m_flags & M_PKTHDR) != 0);
658
659	if ((ifp->if_flags & IFF_UP) == 0)
660		goto drop;
661
662#ifdef MBUFTRACE
663	m_claimm(m, &ec->ec_rx_mowner);
664#endif
665
666	if (__predict_false(m->m_len < sizeof(*eh))) {
667		if ((m = m_pullup(m, sizeof(*eh))) == NULL) {
668			if_statinc(ifp, if_ierrors);
669			return;
670		}
671	}
672
673	eh = mtod(m, struct ether_header *);
674	etype = ntohs(eh->ether_type);
675	ehlen = sizeof(*eh);
676
677	if (__predict_false(earlypkts < 100 ||
678		entropy_epoch() == (unsigned)-1)) {
679		rnd_add_data(NULL, eh, ehlen, 0);
680		earlypkts++;
681	}
682
683	/*
684	 * Determine if the packet is within its size limits. For MPLS the
685	 * header length is variable, so we skip the check.
686	 */
687	if (etype != ETHERTYPE_MPLS && m->m_pkthdr.len >
688	    ETHER_MAX_FRAME(ifp, etype, m->m_flags & M_HASFCS)) {
689#ifdef DIAGNOSTIC
690		mutex_enter(&bigpktpps_lock);
691		if (ppsratecheck(&bigpktppslim_last, &bigpktpps_count,
692		    bigpktppslim)) {
693			printf("%s: discarding oversize frame (len=%d)\n",
694			    ifp->if_xname, m->m_pkthdr.len);
695		}
696		mutex_exit(&bigpktpps_lock);
697#endif
698		goto error;
699	}
700
701	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
702		/*
703		 * If this is not a simplex interface, drop the packet
704		 * if it came from us.
705		 */
706		if ((ifp->if_flags & IFF_SIMPLEX) == 0 &&
707		    memcmp(CLLADDR(ifp->if_sadl), eh->ether_shost,
708		    ETHER_ADDR_LEN) == 0) {
709			goto drop;
710		}
711
712		if (memcmp(etherbroadcastaddr,
713		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
714			m->m_flags |= M_BCAST;
715		else
716			m->m_flags |= M_MCAST;
717		if_statinc(ifp, if_imcasts);
718	}
719
720	/* If the CRC is still on the packet, trim it off. */
721	if (m->m_flags & M_HASFCS) {
722		m_adj(m, -ETHER_CRC_LEN);
723		m->m_flags &= ~M_HASFCS;
724	}
725
726	if_statadd(ifp, if_ibytes, m->m_pkthdr.len);
727
728	if (!vlan_has_tag(m) && etype == ETHERTYPE_VLAN) {
729		m = ether_strip_vlantag(m);
730		if (m == NULL) {
731			if_statinc(ifp, if_ierrors);
732			return;
733		}
734
735		eh = mtod(m, struct ether_header *);
736		etype = ntohs(eh->ether_type);
737		ehlen = sizeof(*eh);
738	}
739
740	if ((m->m_flags & (M_BCAST | M_MCAST | M_PROMISC)) == 0 &&
741	    (ifp->if_flags & IFF_PROMISC) != 0 &&
742	    memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
743	     ETHER_ADDR_LEN) != 0) {
744		m->m_flags |= M_PROMISC;
745	}
746
747	if ((m->m_flags & M_PROMISC) == 0) {
748		if (pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_IN) != 0)
749			return;
750		if (m == NULL)
751			return;
752
753		eh = mtod(m, struct ether_header *);
754		etype = ntohs(eh->ether_type);
755	}
756
757	/*
758	 * Processing a logical interfaces that are able
759	 * to configure vlan(4).
760	*/
761#if NAGR > 0
762	if (ifp->if_lagg != NULL &&
763	    __predict_true(etype != ETHERTYPE_SLOWPROTOCOLS)) {
764		m->m_flags &= ~M_PROMISC;
765		agr_input(ifp, m);
766		return;
767	}
768#endif
769
770	/*
771	 * VLAN processing.
772	 *
773	 * VLAN provides service delimiting so the frames are
774	 * processed before other handlings. If a VLAN interface
775	 * does not exist to take those frames, they're returned
776	 * to ether_input().
777	 */
778
779	if (vlan_has_tag(m)) {
780		if (EVL_VLANOFTAG(vlan_get_tag(m)) == 0) {
781			if (etype == ETHERTYPE_VLAN ||
782			     etype == ETHERTYPE_QINQ)
783				goto drop;
784
785			/* XXX we should actually use the prio value? */
786			m->m_flags &= ~M_VLANTAG;
787		} else {
788#if NVLAN > 0
789			if (ec->ec_nvlans > 0) {
790				m = vlan_input(ifp, m);
791
792				/* vlan_input() called ether_input() recursively */
793				if (m == NULL)
794					return;
795			}
796#endif
797			/* drop VLAN frames not for this port. */
798			goto noproto;
799		}
800	}
801
802#if NCARP > 0
803	if (__predict_false(ifp->if_carp && ifp->if_type != IFT_CARP)) {
804		/*
805		 * Clear M_PROMISC, in case the packet comes from a
806		 * vlan.
807		 */
808		m->m_flags &= ~M_PROMISC;
809		if (carp_input(m, (uint8_t *)&eh->ether_shost,
810		    (uint8_t *)&eh->ether_dhost, eh->ether_type) == 0)
811			return;
812	}
813#endif
814
815	/*
816	 * Handle protocols that expect to have the Ethernet header
817	 * (and possibly FCS) intact.
818	 */
819	switch (etype) {
820#if NPPPOE > 0
821	case ETHERTYPE_PPPOEDISC:
822		pppoedisc_input(ifp, m);
823		return;
824
825	case ETHERTYPE_PPPOE:
826		pppoe_input(ifp, m);
827		return;
828#endif
829
830	case ETHERTYPE_SLOWPROTOCOLS: {
831		uint8_t subtype;
832
833		if (m->m_pkthdr.len < sizeof(*eh) + sizeof(subtype))
834			goto error;
835
836		m_copydata(m, sizeof(*eh), sizeof(subtype), &subtype);
837		switch (subtype) {
838#if NAGR > 0
839		case SLOWPROTOCOLS_SUBTYPE_LACP:
840			if (ifp->if_lagg != NULL) {
841				ieee8023ad_lacp_input(ifp, m);
842				return;
843			}
844			break;
845
846		case SLOWPROTOCOLS_SUBTYPE_MARKER:
847			if (ifp->if_lagg != NULL) {
848				ieee8023ad_marker_input(ifp, m);
849				return;
850			}
851			break;
852#endif
853
854		default:
855			if (subtype == 0 || subtype > 10) {
856				/* illegal value */
857				goto error;
858			}
859			/* unknown subtype */
860			break;
861		}
862	}
863	/* FALLTHROUGH */
864	default:
865		if (m->m_flags & M_PROMISC)
866			goto drop;
867	}
868
869	/* If the CRC is still on the packet, trim it off. */
870	if (m->m_flags & M_HASFCS) {
871		m_adj(m, -ETHER_CRC_LEN);
872		m->m_flags &= ~M_HASFCS;
873	}
874
875	/* etype represents the size of the payload in this case */
876	if (etype <= ETHERMTU + sizeof(struct ether_header)) {
877		KASSERT(ehlen == sizeof(*eh));
878#if defined (LLC) || defined (NETATALK)
879		ether_input_llc(ifp, m, eh);
880		return;
881#else
882		/* ethertype of 0-1500 is regarded as noproto */
883		goto noproto;
884#endif
885	}
886
887	/* Strip off the Ethernet header. */
888	m_adj(m, ehlen);
889
890	switch (etype) {
891#ifdef INET
892	case ETHERTYPE_IP:
893#ifdef GATEWAY
894		if (ipflow_fastforward(m))
895			return;
896#endif
897		pktq = ip_pktq;
898		break;
899
900	case ETHERTYPE_ARP:
901		isr = NETISR_ARP;
902		inq = &arpintrq;
903		break;
904
905	case ETHERTYPE_REVARP:
906		revarpinput(m);	/* XXX queue? */
907		return;
908#endif
909
910#ifdef INET6
911	case ETHERTYPE_IPV6:
912		if (__predict_false(!in6_present))
913			goto noproto;
914#ifdef GATEWAY
915		if (ip6flow_fastforward(&m))
916			return;
917#endif
918		pktq = ip6_pktq;
919		break;
920#endif
921
922#ifdef NETATALK
923	case ETHERTYPE_ATALK:
924		isr = NETISR_ATALK;
925		inq = &atintrq1;
926		break;
927
928	case ETHERTYPE_AARP:
929		aarpinput(ifp, m); /* XXX queue? */
930		return;
931#endif
932
933#ifdef MPLS
934	case ETHERTYPE_MPLS:
935		isr = NETISR_MPLS;
936		inq = &mplsintrq;
937		break;
938#endif
939
940	default:
941		goto noproto;
942	}
943
944	if (__predict_true(pktq)) {
945		const uint32_t h = pktq_rps_hash(&ether_pktq_rps_hash_p, m);
946		if (__predict_false(!pktq_enqueue(pktq, m, h))) {
947			m_freem(m);
948		}
949		return;
950	}
951
952	if (__predict_false(!inq)) {
953		/* Should not happen. */
954		goto error;
955	}
956
957	IFQ_ENQUEUE_ISR(inq, m, isr);
958	return;
959
960drop:
961	m_freem(m);
962	if_statinc(ifp, if_iqdrops);
963	return;
964noproto:
965	m_freem(m);
966	if_statinc(ifp, if_noproto);
967	return;
968error:
969	m_freem(m);
970	if_statinc(ifp, if_ierrors);
971	return;
972}
973
974static void
975ether_bpf_mtap(struct bpf_if *bp, struct mbuf *m, u_int direction)
976{
977	struct ether_vlan_header evl;
978	struct m_hdr mh, md;
979
980	KASSERT(bp != NULL);
981
982	if (!vlan_has_tag(m)) {
983		bpf_mtap3(bp, m, direction);
984		return;
985	}
986
987	memcpy(&evl, mtod(m, char *), ETHER_HDR_LEN);
988	evl.evl_proto = evl.evl_encap_proto;
989	evl.evl_encap_proto = htons(ETHERTYPE_VLAN);
990	evl.evl_tag = htons(vlan_get_tag(m));
991
992	md.mh_flags = 0;
993	md.mh_data = m->m_data + ETHER_HDR_LEN;
994	md.mh_len = m->m_len - ETHER_HDR_LEN;
995	md.mh_next = m->m_next;
996
997	mh.mh_flags = 0;
998	mh.mh_data = (char *)&evl;
999	mh.mh_len = sizeof(evl);
1000	mh.mh_next = (struct mbuf *)&md;
1001
1002	bpf_mtap3(bp, (struct mbuf *)&mh, direction);
1003}
1004
1005/*
1006 * Convert Ethernet address to printable (loggable) representation.
1007 */
1008char *
1009ether_sprintf(const u_char *ap)
1010{
1011	static char etherbuf[3 * ETHER_ADDR_LEN];
1012	return ether_snprintf(etherbuf, sizeof(etherbuf), ap);
1013}
1014
1015char *
1016ether_snprintf(char *buf, size_t len, const u_char *ap)
1017{
1018	char *cp = buf;
1019	size_t i;
1020
1021	for (i = 0; i < len / 3; i++) {
1022		*cp++ = hexdigits[*ap >> 4];
1023		*cp++ = hexdigits[*ap++ & 0xf];
1024		*cp++ = ':';
1025	}
1026	*--cp = '\0';
1027	return buf;
1028}
1029
1030/*
1031 * Perform common duties while attaching to interface list
1032 */
1033void
1034ether_ifattach(struct ifnet *ifp, const uint8_t *lla)
1035{
1036	struct ethercom *ec = (struct ethercom *)ifp;
1037	char xnamebuf[HOOKNAMSIZ];
1038
1039	ifp->if_type = IFT_ETHER;
1040	ifp->if_hdrlen = ETHER_HDR_LEN;
1041	ifp->if_dlt = DLT_EN10MB;
1042	ifp->if_mtu = ETHERMTU;
1043	ifp->if_output = ether_output;
1044	ifp->_if_input = ether_input;
1045	ifp->if_bpf_mtap = ether_bpf_mtap;
1046	if (ifp->if_baudrate == 0)
1047		ifp->if_baudrate = IF_Mbps(10);		/* just a default */
1048
1049	if (lla != NULL)
1050		if_set_sadl(ifp, lla, ETHER_ADDR_LEN, !ETHER_IS_LOCAL(lla));
1051
1052	LIST_INIT(&ec->ec_multiaddrs);
1053	SIMPLEQ_INIT(&ec->ec_vids);
1054	ec->ec_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
1055	ec->ec_flags = 0;
1056	ifp->if_broadcastaddr = etherbroadcastaddr;
1057	bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header));
1058	snprintf(xnamebuf, sizeof(xnamebuf),
1059	    "%s-ether_ifdetachhooks", ifp->if_xname);
1060	ec->ec_ifdetach_hooks = simplehook_create(IPL_NET, xnamebuf);
1061#ifdef MBUFTRACE
1062	mowner_init_owner(&ec->ec_tx_mowner, ifp->if_xname, "tx");
1063	mowner_init_owner(&ec->ec_rx_mowner, ifp->if_xname, "rx");
1064	MOWNER_ATTACH(&ec->ec_tx_mowner);
1065	MOWNER_ATTACH(&ec->ec_rx_mowner);
1066	ifp->if_mowner = &ec->ec_tx_mowner;
1067#endif
1068}
1069
1070void
1071ether_ifdetach(struct ifnet *ifp)
1072{
1073	struct ethercom *ec = (void *) ifp;
1074	struct ether_multi *enm;
1075
1076	IFNET_ASSERT_UNLOCKED(ifp);
1077	/*
1078	 * Prevent further calls to ioctl (for example turning off
1079	 * promiscuous mode from the bridge code), which eventually can
1080	 * call if_init() which can cause panics because the interface
1081	 * is in the process of being detached. Return device not configured
1082	 * instead.
1083	 */
1084	ifp->if_ioctl = __FPTRCAST(int (*)(struct ifnet *, u_long, void *),
1085	    enxio);
1086
1087	simplehook_dohooks(ec->ec_ifdetach_hooks);
1088	KASSERT(!simplehook_has_hooks(ec->ec_ifdetach_hooks));
1089	simplehook_destroy(ec->ec_ifdetach_hooks);
1090
1091	bpf_detach(ifp);
1092
1093	ETHER_LOCK(ec);
1094	KASSERT(ec->ec_nvlans == 0);
1095	while ((enm = LIST_FIRST(&ec->ec_multiaddrs)) != NULL) {
1096		LIST_REMOVE(enm, enm_list);
1097		kmem_free(enm, sizeof(*enm));
1098		ec->ec_multicnt--;
1099	}
1100	ETHER_UNLOCK(ec);
1101
1102	mutex_obj_free(ec->ec_lock);
1103	ec->ec_lock = NULL;
1104
1105	ifp->if_mowner = NULL;
1106	MOWNER_DETACH(&ec->ec_rx_mowner);
1107	MOWNER_DETACH(&ec->ec_tx_mowner);
1108}
1109
1110void *
1111ether_ifdetachhook_establish(struct ifnet *ifp,
1112    void (*fn)(void *), void *arg)
1113{
1114	struct ethercom *ec;
1115	khook_t *hk;
1116
1117	if (ifp->if_type != IFT_ETHER)
1118		return NULL;
1119
1120	ec = (struct ethercom *)ifp;
1121	hk = simplehook_establish(ec->ec_ifdetach_hooks,
1122	    fn, arg);
1123
1124	return (void *)hk;
1125}
1126
1127void
1128ether_ifdetachhook_disestablish(struct ifnet *ifp,
1129    void *vhook, kmutex_t *lock)
1130{
1131	struct ethercom *ec;
1132
1133	if (vhook == NULL)
1134		return;
1135
1136	ec = (struct ethercom *)ifp;
1137	simplehook_disestablish(ec->ec_ifdetach_hooks, vhook, lock);
1138}
1139
1140#if 0
1141/*
1142 * This is for reference.  We have a table-driven version
1143 * of the little-endian crc32 generator, which is faster
1144 * than the double-loop.
1145 */
1146uint32_t
1147ether_crc32_le(const uint8_t *buf, size_t len)
1148{
1149	uint32_t c, crc, carry;
1150	size_t i, j;
1151
1152	crc = 0xffffffffU;	/* initial value */
1153
1154	for (i = 0; i < len; i++) {
1155		c = buf[i];
1156		for (j = 0; j < 8; j++) {
1157			carry = ((crc & 0x01) ? 1 : 0) ^ (c & 0x01);
1158			crc >>= 1;
1159			c >>= 1;
1160			if (carry)
1161				crc = (crc ^ ETHER_CRC_POLY_LE);
1162		}
1163	}
1164
1165	return (crc);
1166}
1167#else
1168uint32_t
1169ether_crc32_le(const uint8_t *buf, size_t len)
1170{
1171	static const uint32_t crctab[] = {
1172		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
1173		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
1174		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
1175		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
1176	};
1177	uint32_t crc;
1178	size_t i;
1179
1180	crc = 0xffffffffU;	/* initial value */
1181
1182	for (i = 0; i < len; i++) {
1183		crc ^= buf[i];
1184		crc = (crc >> 4) ^ crctab[crc & 0xf];
1185		crc = (crc >> 4) ^ crctab[crc & 0xf];
1186	}
1187
1188	return (crc);
1189}
1190#endif
1191
1192uint32_t
1193ether_crc32_be(const uint8_t *buf, size_t len)
1194{
1195	uint32_t c, crc, carry;
1196	size_t i, j;
1197
1198	crc = 0xffffffffU;	/* initial value */
1199
1200	for (i = 0; i < len; i++) {
1201		c = buf[i];
1202		for (j = 0; j < 8; j++) {
1203			carry = ((crc & 0x80000000U) ? 1 : 0) ^ (c & 0x01);
1204			crc <<= 1;
1205			c >>= 1;
1206			if (carry)
1207				crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
1208		}
1209	}
1210
1211	return (crc);
1212}
1213
1214#ifdef INET
1215const uint8_t ether_ipmulticast_min[ETHER_ADDR_LEN] =
1216    { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 };
1217const uint8_t ether_ipmulticast_max[ETHER_ADDR_LEN] =
1218    { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff };
1219#endif
1220#ifdef INET6
1221const uint8_t ether_ip6multicast_min[ETHER_ADDR_LEN] =
1222    { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 };
1223const uint8_t ether_ip6multicast_max[ETHER_ADDR_LEN] =
1224    { 0x33, 0x33, 0xff, 0xff, 0xff, 0xff };
1225#endif
1226
1227/*
1228 * ether_aton implementation, not using a static buffer.
1229 */
1230int
1231ether_aton_r(u_char *dest, size_t len, const char *str)
1232{
1233	const u_char *cp = (const void *)str;
1234	u_char *ep;
1235
1236#define atox(c)	(((c) <= '9') ? ((c) - '0') : ((toupper(c) - 'A') + 10))
1237
1238	if (len < ETHER_ADDR_LEN)
1239		return ENOSPC;
1240
1241	ep = dest + ETHER_ADDR_LEN;
1242
1243	while (*cp) {
1244		if (!isxdigit(*cp))
1245			return EINVAL;
1246
1247		*dest = atox(*cp);
1248		cp++;
1249		if (isxdigit(*cp)) {
1250			*dest = (*dest << 4) | atox(*cp);
1251			cp++;
1252		}
1253		dest++;
1254
1255		if (dest == ep)
1256			return (*cp == '\0') ? 0 : ENAMETOOLONG;
1257
1258		switch (*cp) {
1259		case ':':
1260		case '-':
1261		case '.':
1262			cp++;
1263			break;
1264		}
1265	}
1266	return ENOBUFS;
1267}
1268
1269/*
1270 * Convert a sockaddr into an Ethernet address or range of Ethernet
1271 * addresses.
1272 */
1273int
1274ether_multiaddr(const struct sockaddr *sa, uint8_t addrlo[ETHER_ADDR_LEN],
1275    uint8_t addrhi[ETHER_ADDR_LEN])
1276{
1277#ifdef INET
1278	const struct sockaddr_in *sin;
1279#endif
1280#ifdef INET6
1281	const struct sockaddr_in6 *sin6;
1282#endif
1283
1284	switch (sa->sa_family) {
1285
1286	case AF_UNSPEC:
1287		memcpy(addrlo, sa->sa_data, ETHER_ADDR_LEN);
1288		memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1289		break;
1290
1291#ifdef INET
1292	case AF_INET:
1293		sin = satocsin(sa);
1294		if (sin->sin_addr.s_addr == INADDR_ANY) {
1295			/*
1296			 * An IP address of INADDR_ANY means listen to
1297			 * or stop listening to all of the Ethernet
1298			 * multicast addresses used for IP.
1299			 * (This is for the sake of IP multicast routers.)
1300			 */
1301			memcpy(addrlo, ether_ipmulticast_min, ETHER_ADDR_LEN);
1302			memcpy(addrhi, ether_ipmulticast_max, ETHER_ADDR_LEN);
1303		} else {
1304			ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo);
1305			memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1306		}
1307		break;
1308#endif
1309#ifdef INET6
1310	case AF_INET6:
1311		sin6 = satocsin6(sa);
1312		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1313			/*
1314			 * An IP6 address of 0 means listen to or stop
1315			 * listening to all of the Ethernet multicast
1316			 * address used for IP6.
1317			 * (This is used for multicast routers.)
1318			 */
1319			memcpy(addrlo, ether_ip6multicast_min, ETHER_ADDR_LEN);
1320			memcpy(addrhi, ether_ip6multicast_max, ETHER_ADDR_LEN);
1321		} else {
1322			ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, addrlo);
1323			memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
1324		}
1325		break;
1326#endif
1327
1328	default:
1329		return EAFNOSUPPORT;
1330	}
1331	return 0;
1332}
1333
1334/*
1335 * Add an Ethernet multicast address or range of addresses to the list for a
1336 * given interface.
1337 */
1338int
1339ether_addmulti(const struct sockaddr *sa, struct ethercom *ec)
1340{
1341	struct ether_multi *enm, *_enm;
1342	u_char addrlo[ETHER_ADDR_LEN];
1343	u_char addrhi[ETHER_ADDR_LEN];
1344	int error = 0;
1345
1346	/* Allocate out of lock */
1347	enm = kmem_alloc(sizeof(*enm), KM_SLEEP);
1348
1349	ETHER_LOCK(ec);
1350	error = ether_multiaddr(sa, addrlo, addrhi);
1351	if (error != 0)
1352		goto out;
1353
1354	/*
1355	 * Verify that we have valid Ethernet multicast addresses.
1356	 */
1357	if (!ETHER_IS_MULTICAST(addrlo) || !ETHER_IS_MULTICAST(addrhi)) {
1358		error = EINVAL;
1359		goto out;
1360	}
1361
1362	/*
1363	 * See if the address range is already in the list.
1364	 */
1365	_enm = ether_lookup_multi(addrlo, addrhi, ec);
1366	if (_enm != NULL) {
1367		/*
1368		 * Found it; just increment the reference count.
1369		 */
1370		++_enm->enm_refcount;
1371		error = 0;
1372		goto out;
1373	}
1374
1375	/*
1376	 * Link a new multicast record into the interface's multicast list.
1377	 */
1378	memcpy(enm->enm_addrlo, addrlo, ETHER_ADDR_LEN);
1379	memcpy(enm->enm_addrhi, addrhi, ETHER_ADDR_LEN);
1380	enm->enm_refcount = 1;
1381	LIST_INSERT_HEAD(&ec->ec_multiaddrs, enm, enm_list);
1382	ec->ec_multicnt++;
1383
1384	/*
1385	 * Return ENETRESET to inform the driver that the list has changed
1386	 * and its reception filter should be adjusted accordingly.
1387	 */
1388	error = ENETRESET;
1389	enm = NULL;
1390
1391out:
1392	ETHER_UNLOCK(ec);
1393	if (enm != NULL)
1394		kmem_free(enm, sizeof(*enm));
1395	return error;
1396}
1397
1398/*
1399 * Delete a multicast address record.
1400 */
1401int
1402ether_delmulti(const struct sockaddr *sa, struct ethercom *ec)
1403{
1404	struct ether_multi *enm;
1405	u_char addrlo[ETHER_ADDR_LEN];
1406	u_char addrhi[ETHER_ADDR_LEN];
1407	int error;
1408
1409	ETHER_LOCK(ec);
1410	error = ether_multiaddr(sa, addrlo, addrhi);
1411	if (error != 0)
1412		goto error;
1413
1414	/*
1415	 * Look up the address in our list.
1416	 */
1417	enm = ether_lookup_multi(addrlo, addrhi, ec);
1418	if (enm == NULL) {
1419		error = ENXIO;
1420		goto error;
1421	}
1422	if (--enm->enm_refcount != 0) {
1423		/*
1424		 * Still some claims to this record.
1425		 */
1426		error = 0;
1427		goto error;
1428	}
1429
1430	/*
1431	 * No remaining claims to this record; unlink and free it.
1432	 */
1433	LIST_REMOVE(enm, enm_list);
1434	ec->ec_multicnt--;
1435	ETHER_UNLOCK(ec);
1436	kmem_free(enm, sizeof(*enm));
1437
1438	/*
1439	 * Return ENETRESET to inform the driver that the list has changed
1440	 * and its reception filter should be adjusted accordingly.
1441	 */
1442	return ENETRESET;
1443
1444error:
1445	ETHER_UNLOCK(ec);
1446	return error;
1447}
1448
1449void
1450ether_set_ifflags_cb(struct ethercom *ec, ether_cb_t cb)
1451{
1452	ec->ec_ifflags_cb = cb;
1453}
1454
1455void
1456ether_set_vlan_cb(struct ethercom *ec, ether_vlancb_t cb)
1457{
1458
1459	ec->ec_vlan_cb = cb;
1460}
1461
1462static int
1463ether_ioctl_reinit(struct ethercom *ec)
1464{
1465	struct ifnet *ifp = &ec->ec_if;
1466	int error;
1467
1468	KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
1469
1470	switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) {
1471	case IFF_RUNNING:
1472		/*
1473		 * If interface is marked down and it is running,
1474		 * then stop and disable it.
1475		 */
1476		if_stop(ifp, 1);
1477		break;
1478	case IFF_UP:
1479		/*
1480		 * If interface is marked up and it is stopped, then
1481		 * start it.
1482		 */
1483		return if_init(ifp);
1484	case IFF_UP | IFF_RUNNING:
1485		error = 0;
1486		if (ec->ec_ifflags_cb != NULL) {
1487			error = (*ec->ec_ifflags_cb)(ec);
1488			if (error == ENETRESET) {
1489				/*
1490				 * Reset the interface to pick up
1491				 * changes in any other flags that
1492				 * affect the hardware state.
1493				 */
1494				return if_init(ifp);
1495			}
1496		} else
1497			error = if_init(ifp);
1498		return error;
1499	case 0:
1500		break;
1501	}
1502
1503	return 0;
1504}
1505
1506/*
1507 * Common ioctls for Ethernet interfaces.  Note, we must be
1508 * called at splnet().
1509 */
1510int
1511ether_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1512{
1513	struct ethercom *ec = (void *)ifp;
1514	struct eccapreq *eccr;
1515	struct ifreq *ifr = (struct ifreq *)data;
1516	struct if_laddrreq *iflr = data;
1517	const struct sockaddr_dl *sdl;
1518	static const uint8_t zero[ETHER_ADDR_LEN];
1519	int error;
1520
1521	switch (cmd) {
1522	case SIOCINITIFADDR:
1523	    {
1524		struct ifaddr *ifa = (struct ifaddr *)data;
1525		if (ifa->ifa_addr->sa_family != AF_LINK
1526		    && (ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
1527		       (IFF_UP | IFF_RUNNING)) {
1528			ifp->if_flags |= IFF_UP;
1529			if ((error = if_init(ifp)) != 0)
1530				return error;
1531		}
1532#ifdef INET
1533		if (ifa->ifa_addr->sa_family == AF_INET)
1534			arp_ifinit(ifp, ifa);
1535#endif
1536		return 0;
1537	    }
1538
1539	case SIOCSIFMTU:
1540	    {
1541		int maxmtu;
1542
1543		if (ec->ec_capabilities & ETHERCAP_JUMBO_MTU)
1544			maxmtu = ETHERMTU_JUMBO;
1545		else
1546			maxmtu = ETHERMTU;
1547
1548		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > maxmtu)
1549			return EINVAL;
1550		else if ((error = ifioctl_common(ifp, cmd, data)) != ENETRESET)
1551			return error;
1552		else if (ifp->if_flags & IFF_UP) {
1553			/* Make sure the device notices the MTU change. */
1554			return if_init(ifp);
1555		} else
1556			return 0;
1557	    }
1558
1559	case SIOCSIFFLAGS:
1560		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
1561			return error;
1562		return ether_ioctl_reinit(ec);
1563	case SIOCGIFFLAGS:
1564		error = ifioctl_common(ifp, cmd, data);
1565		if (error == 0) {
1566			/* Set IFF_ALLMULTI for backcompat */
1567			ifr->ifr_flags |= (ec->ec_flags & ETHER_F_ALLMULTI) ?
1568			    IFF_ALLMULTI : 0;
1569		}
1570		return error;
1571	case SIOCGETHERCAP:
1572		eccr = (struct eccapreq *)data;
1573		eccr->eccr_capabilities = ec->ec_capabilities;
1574		eccr->eccr_capenable = ec->ec_capenable;
1575		return 0;
1576	case SIOCSETHERCAP:
1577		eccr = (struct eccapreq *)data;
1578		if ((eccr->eccr_capenable & ~ec->ec_capabilities) != 0)
1579			return EINVAL;
1580		if (eccr->eccr_capenable == ec->ec_capenable)
1581			return 0;
1582#if 0 /* notyet */
1583		ec->ec_capenable = (ec->ec_capenable & ETHERCAP_CANTCHANGE)
1584		    | (eccr->eccr_capenable & ~ETHERCAP_CANTCHANGE);
1585#else
1586		ec->ec_capenable = eccr->eccr_capenable;
1587#endif
1588		return ether_ioctl_reinit(ec);
1589	case SIOCADDMULTI:
1590		return ether_addmulti(ifreq_getaddr(cmd, ifr), ec);
1591	case SIOCDELMULTI:
1592		return ether_delmulti(ifreq_getaddr(cmd, ifr), ec);
1593	case SIOCSIFMEDIA:
1594	case SIOCGIFMEDIA:
1595		if (ec->ec_mii != NULL)
1596			return ifmedia_ioctl(ifp, ifr, &ec->ec_mii->mii_media,
1597			    cmd);
1598		else if (ec->ec_ifmedia != NULL)
1599			return ifmedia_ioctl(ifp, ifr, ec->ec_ifmedia, cmd);
1600		else
1601			return ENOTTY;
1602		break;
1603	case SIOCALIFADDR:
1604		sdl = satocsdl(sstocsa(&iflr->addr));
1605		if (sdl->sdl_family != AF_LINK)
1606			;
1607		else if (ETHER_IS_MULTICAST(CLLADDR(sdl)))
1608			return EINVAL;
1609		else if (memcmp(zero, CLLADDR(sdl), sizeof(zero)) == 0)
1610			return EINVAL;
1611		/*FALLTHROUGH*/
1612	default:
1613		return ifioctl_common(ifp, cmd, data);
1614	}
1615	return 0;
1616}
1617
1618/*
1619 * Enable/disable passing VLAN packets if the parent interface supports it.
1620 * Return:
1621 * 	 0: Ok
1622 *	-1: Parent interface does not support vlans
1623 *	>0: Error
1624 */
1625int
1626ether_enable_vlan_mtu(struct ifnet *ifp)
1627{
1628	int error;
1629	struct ethercom *ec = (void *)ifp;
1630
1631	/* Parent does not support VLAN's */
1632	if ((ec->ec_capabilities & ETHERCAP_VLAN_MTU) == 0)
1633		return -1;
1634
1635	/*
1636	 * Parent supports the VLAN_MTU capability,
1637	 * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames;
1638	 * enable it.
1639	 */
1640	ec->ec_capenable |= ETHERCAP_VLAN_MTU;
1641
1642	/* Interface is down, defer for later */
1643	if ((ifp->if_flags & IFF_UP) == 0)
1644		return 0;
1645
1646	if ((error = if_flags_set(ifp, ifp->if_flags)) == 0)
1647		return 0;
1648
1649	ec->ec_capenable &= ~ETHERCAP_VLAN_MTU;
1650	return error;
1651}
1652
1653int
1654ether_disable_vlan_mtu(struct ifnet *ifp)
1655{
1656	int error;
1657	struct ethercom *ec = (void *)ifp;
1658
1659	/* We still have VLAN's, defer for later */
1660	if (ec->ec_nvlans != 0)
1661		return 0;
1662
1663	/* Parent does not support VLAB's, nothing to do. */
1664	if ((ec->ec_capenable & ETHERCAP_VLAN_MTU) == 0)
1665		return -1;
1666
1667	/*
1668	 * Disable Tx/Rx of VLAN-sized frames.
1669	 */
1670	ec->ec_capenable &= ~ETHERCAP_VLAN_MTU;
1671
1672	/* Interface is down, defer for later */
1673	if ((ifp->if_flags & IFF_UP) == 0)
1674		return 0;
1675
1676	if ((error = if_flags_set(ifp, ifp->if_flags)) == 0)
1677		return 0;
1678
1679	ec->ec_capenable |= ETHERCAP_VLAN_MTU;
1680	return error;
1681}
1682
1683/*
1684 * Add and delete VLAN TAG
1685 */
1686int
1687ether_add_vlantag(struct ifnet *ifp, uint16_t vtag, bool *vlanmtu_status)
1688{
1689	struct ethercom *ec = (void *)ifp;
1690	struct vlanid_list *vidp;
1691	bool vlanmtu_enabled;
1692	uint16_t vid = EVL_VLANOFTAG(vtag);
1693	int error;
1694
1695	vlanmtu_enabled = false;
1696
1697	/* Add a vid to the list */
1698	vidp = kmem_alloc(sizeof(*vidp), KM_SLEEP);
1699	vidp->vid = vid;
1700
1701	ETHER_LOCK(ec);
1702	ec->ec_nvlans++;
1703	SIMPLEQ_INSERT_TAIL(&ec->ec_vids, vidp, vid_list);
1704	ETHER_UNLOCK(ec);
1705
1706	if (ec->ec_nvlans == 1) {
1707		IFNET_LOCK(ifp);
1708		error = ether_enable_vlan_mtu(ifp);
1709		IFNET_UNLOCK(ifp);
1710
1711		if (error == 0) {
1712			vlanmtu_enabled = true;
1713		} else if (error != -1) {
1714			goto fail;
1715		}
1716	}
1717
1718	if (ec->ec_vlan_cb != NULL) {
1719		error = (*ec->ec_vlan_cb)(ec, vid, true);
1720		if (error != 0)
1721			goto fail;
1722	}
1723
1724	if (vlanmtu_status != NULL)
1725		*vlanmtu_status = vlanmtu_enabled;
1726
1727	return 0;
1728fail:
1729	ETHER_LOCK(ec);
1730	ec->ec_nvlans--;
1731	SIMPLEQ_REMOVE(&ec->ec_vids, vidp, vlanid_list, vid_list);
1732	ETHER_UNLOCK(ec);
1733
1734	if (vlanmtu_enabled) {
1735		IFNET_LOCK(ifp);
1736		(void)ether_disable_vlan_mtu(ifp);
1737		IFNET_UNLOCK(ifp);
1738	}
1739
1740	kmem_free(vidp, sizeof(*vidp));
1741
1742	return error;
1743}
1744
1745int
1746ether_del_vlantag(struct ifnet *ifp, uint16_t vtag)
1747{
1748	struct ethercom *ec = (void *)ifp;
1749	struct vlanid_list *vidp;
1750	uint16_t vid = EVL_VLANOFTAG(vtag);
1751
1752	ETHER_LOCK(ec);
1753	SIMPLEQ_FOREACH(vidp, &ec->ec_vids, vid_list) {
1754		if (vidp->vid == vid) {
1755			SIMPLEQ_REMOVE(&ec->ec_vids, vidp,
1756			    vlanid_list, vid_list);
1757			ec->ec_nvlans--;
1758			break;
1759		}
1760	}
1761	ETHER_UNLOCK(ec);
1762
1763	if (vidp == NULL)
1764		return ENOENT;
1765
1766	if (ec->ec_vlan_cb != NULL) {
1767		(void)(*ec->ec_vlan_cb)(ec, vidp->vid, false);
1768	}
1769
1770	if (ec->ec_nvlans == 0) {
1771		IFNET_LOCK(ifp);
1772		(void)ether_disable_vlan_mtu(ifp);
1773		IFNET_UNLOCK(ifp);
1774	}
1775
1776	kmem_free(vidp, sizeof(*vidp));
1777
1778	return 0;
1779}
1780
1781int
1782ether_inject_vlantag(struct mbuf **mp, uint16_t etype, uint16_t tag)
1783{
1784	static const size_t min_data_len =
1785	    ETHER_MIN_LEN - ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
1786	/* Used to pad ethernet frames with < ETHER_MIN_LEN bytes */
1787	static const char vlan_zero_pad_buff[ETHER_MIN_LEN] = { 0 };
1788
1789	struct ether_vlan_header *evl;
1790	struct mbuf *m = *mp;
1791	int error;
1792
1793	error = 0;
1794
1795	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
1796	if (m == NULL) {
1797		error = ENOBUFS;
1798		goto out;
1799	}
1800
1801	if (m->m_len < sizeof(*evl)) {
1802		m = m_pullup(m, sizeof(*evl));
1803		if (m == NULL) {
1804			error = ENOBUFS;
1805			goto out;
1806		}
1807	}
1808
1809	/*
1810	 * Transform the Ethernet header into an
1811	 * Ethernet header with 802.1Q encapsulation.
1812	 */
1813	memmove(mtod(m, void *),
1814	    mtod(m, char *) + ETHER_VLAN_ENCAP_LEN,
1815	    sizeof(struct ether_header));
1816	evl = mtod(m, struct ether_vlan_header *);
1817	evl->evl_proto = evl->evl_encap_proto;
1818	evl->evl_encap_proto = htons(etype);
1819	evl->evl_tag = htons(tag);
1820
1821	/*
1822	 * To cater for VLAN-aware layer 2 ethernet
1823	 * switches which may need to strip the tag
1824	 * before forwarding the packet, make sure
1825	 * the packet+tag is at least 68 bytes long.
1826	 * This is necessary because our parent will
1827	 * only pad to 64 bytes (ETHER_MIN_LEN) and
1828	 * some switches will not pad by themselves
1829	 * after deleting a tag.
1830	 */
1831	if (m->m_pkthdr.len < min_data_len) {
1832		m_copyback(m, m->m_pkthdr.len,
1833		    min_data_len - m->m_pkthdr.len,
1834		    vlan_zero_pad_buff);
1835	}
1836
1837	m->m_flags &= ~M_VLANTAG;
1838
1839out:
1840	*mp = m;
1841	return error;
1842}
1843
1844struct mbuf *
1845ether_strip_vlantag(struct mbuf *m)
1846{
1847	struct ether_vlan_header *evl;
1848
1849	if (m->m_len < sizeof(*evl) &&
1850	    (m = m_pullup(m, sizeof(*evl))) == NULL) {
1851		return NULL;
1852	}
1853
1854	if (m_makewritable(&m, 0, sizeof(*evl), M_DONTWAIT)) {
1855		m_freem(m);
1856		return NULL;
1857	}
1858
1859	evl = mtod(m, struct ether_vlan_header *);
1860	KASSERT(ntohs(evl->evl_encap_proto) == ETHERTYPE_VLAN);
1861
1862	vlan_set_tag(m, ntohs(evl->evl_tag));
1863
1864	/*
1865	 * Restore the original ethertype.  We'll remove
1866	 * the encapsulation after we've found the vlan
1867	 * interface corresponding to the tag.
1868	 */
1869	evl->evl_encap_proto = evl->evl_proto;
1870
1871	/*
1872	 * Remove the encapsulation header and append tag.
1873	 * The original header has already been fixed up above.
1874	 */
1875	vlan_set_tag(m, ntohs(evl->evl_tag));
1876	memmove((char *)evl + ETHER_VLAN_ENCAP_LEN, evl,
1877	    offsetof(struct ether_vlan_header, evl_encap_proto));
1878	m_adj(m, ETHER_VLAN_ENCAP_LEN);
1879
1880	return m;
1881}
1882
1883static int
1884ether_multicast_sysctl(SYSCTLFN_ARGS)
1885{
1886	struct ether_multi *enm;
1887	struct ifnet *ifp;
1888	struct ethercom *ec;
1889	int error = 0;
1890	size_t written;
1891	struct psref psref;
1892	int bound;
1893	unsigned int multicnt;
1894	struct ether_multi_sysctl *addrs;
1895	int i;
1896
1897	if (namelen != 1)
1898		return EINVAL;
1899
1900	bound = curlwp_bind();
1901	ifp = if_get_byindex(name[0], &psref);
1902	if (ifp == NULL) {
1903		error = ENODEV;
1904		goto out;
1905	}
1906	if (ifp->if_type != IFT_ETHER) {
1907		if_put(ifp, &psref);
1908		*oldlenp = 0;
1909		goto out;
1910	}
1911	ec = (struct ethercom *)ifp;
1912
1913	if (oldp == NULL) {
1914		if_put(ifp, &psref);
1915		*oldlenp = ec->ec_multicnt * sizeof(*addrs);
1916		goto out;
1917	}
1918
1919	/*
1920	 * ec->ec_lock is a spin mutex so we cannot call sysctl_copyout, which
1921	 * is sleepable, while holding it. Copy data to a local buffer first
1922	 * with the lock taken and then call sysctl_copyout without holding it.
1923	 */
1924retry:
1925	multicnt = ec->ec_multicnt;
1926
1927	if (multicnt == 0) {
1928		if_put(ifp, &psref);
1929		*oldlenp = 0;
1930		goto out;
1931	}
1932
1933	addrs = kmem_zalloc(sizeof(*addrs) * multicnt, KM_SLEEP);
1934
1935	ETHER_LOCK(ec);
1936	if (multicnt != ec->ec_multicnt) {
1937		/* The number of multicast addresses has changed */
1938		ETHER_UNLOCK(ec);
1939		kmem_free(addrs, sizeof(*addrs) * multicnt);
1940		goto retry;
1941	}
1942
1943	i = 0;
1944	LIST_FOREACH(enm, &ec->ec_multiaddrs, enm_list) {
1945		struct ether_multi_sysctl *addr = &addrs[i];
1946		addr->enm_refcount = enm->enm_refcount;
1947		memcpy(addr->enm_addrlo, enm->enm_addrlo, ETHER_ADDR_LEN);
1948		memcpy(addr->enm_addrhi, enm->enm_addrhi, ETHER_ADDR_LEN);
1949		i++;
1950	}
1951	ETHER_UNLOCK(ec);
1952
1953	error = 0;
1954	written = 0;
1955	for (i = 0; i < multicnt; i++) {
1956		struct ether_multi_sysctl *addr = &addrs[i];
1957
1958		if (written + sizeof(*addr) > *oldlenp)
1959			break;
1960		error = sysctl_copyout(l, addr, oldp, sizeof(*addr));
1961		if (error)
1962			break;
1963		written += sizeof(*addr);
1964		oldp = (char *)oldp + sizeof(*addr);
1965	}
1966	kmem_free(addrs, sizeof(*addrs) * multicnt);
1967
1968	if_put(ifp, &psref);
1969
1970	*oldlenp = written;
1971out:
1972	curlwp_bindx(bound);
1973	return error;
1974}
1975
1976static void
1977ether_sysctl_setup(struct sysctllog **clog)
1978{
1979	const struct sysctlnode *rnode = NULL;
1980
1981	sysctl_createv(clog, 0, NULL, &rnode,
1982		       CTLFLAG_PERMANENT,
1983		       CTLTYPE_NODE, "ether",
1984		       SYSCTL_DESCR("Ethernet-specific information"),
1985		       NULL, 0, NULL, 0,
1986		       CTL_NET, CTL_CREATE, CTL_EOL);
1987
1988	sysctl_createv(clog, 0, &rnode, NULL,
1989		       CTLFLAG_PERMANENT,
1990		       CTLTYPE_NODE, "multicast",
1991		       SYSCTL_DESCR("multicast addresses"),
1992		       ether_multicast_sysctl, 0, NULL, 0,
1993		       CTL_CREATE, CTL_EOL);
1994
1995	sysctl_createv(clog, 0, &rnode, NULL,
1996		       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
1997		       CTLTYPE_STRING, "rps_hash",
1998		       SYSCTL_DESCR("Interface rps hash function control"),
1999		       sysctl_pktq_rps_hash_handler, 0, (void *)&ether_pktq_rps_hash_p,
2000		       PKTQ_RPS_HASH_NAME_LEN,
2001		       CTL_CREATE, CTL_EOL);
2002}
2003
2004void
2005etherinit(void)
2006{
2007
2008#ifdef DIAGNOSTIC
2009	mutex_init(&bigpktpps_lock, MUTEX_DEFAULT, IPL_NET);
2010#endif
2011	ether_pktq_rps_hash_p = pktq_rps_hash_default;
2012	ether_sysctl_setup(NULL);
2013}
2014