ifq.c revision 1.35
1/*	$OpenBSD: ifq.c,v 1.35 2019/10/08 04:18:00 dlg Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bpfilter.h"
20
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/socket.h>
24#include <sys/mbuf.h>
25#include <sys/proc.h>
26#include <sys/sysctl.h>
27
28#include <net/if.h>
29#include <net/if_var.h>
30
31#if NBPFILTER > 0
32#include <net/bpf.h>
33#endif
34
35/*
36 * priq glue
37 */
38unsigned int	 priq_idx(unsigned int, const struct mbuf *);
39struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
40struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
41void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
42void		 priq_purge(struct ifqueue *, struct mbuf_list *);
43
44void		*priq_alloc(unsigned int, void *);
45void		 priq_free(unsigned int, void *);
46
47const struct ifq_ops priq_ops = {
48	priq_idx,
49	priq_enq,
50	priq_deq_begin,
51	priq_deq_commit,
52	priq_purge,
53	priq_alloc,
54	priq_free,
55};
56
57const struct ifq_ops * const ifq_priq_ops = &priq_ops;
58
59/*
60 * priq internal structures
61 */
62
63struct priq {
64	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
65};
66
67/*
68 * ifqueue serialiser
69 */
70
71void	ifq_start_task(void *);
72void	ifq_restart_task(void *);
73void	ifq_barrier_task(void *);
74void	ifq_bundle_task(void *);
75
76static inline void
77ifq_run_start(struct ifqueue *ifq)
78{
79	ifq_serialize(ifq, &ifq->ifq_start);
80}
81
82void
83ifq_serialize(struct ifqueue *ifq, struct task *t)
84{
85	struct task work;
86
87	if (ISSET(t->t_flags, TASK_ONQUEUE))
88		return;
89
90	mtx_enter(&ifq->ifq_task_mtx);
91	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
92		SET(t->t_flags, TASK_ONQUEUE);
93		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
94	}
95
96	if (ifq->ifq_serializer == NULL) {
97		ifq->ifq_serializer = curcpu();
98
99		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
100			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
101			CLR(t->t_flags, TASK_ONQUEUE);
102			work = *t; /* copy to caller to avoid races */
103
104			mtx_leave(&ifq->ifq_task_mtx);
105
106			(*work.t_func)(work.t_arg);
107
108			mtx_enter(&ifq->ifq_task_mtx);
109		}
110
111		ifq->ifq_serializer = NULL;
112	}
113	mtx_leave(&ifq->ifq_task_mtx);
114}
115
116int
117ifq_is_serialized(struct ifqueue *ifq)
118{
119	return (ifq->ifq_serializer == curcpu());
120}
121
122void
123ifq_start(struct ifqueue *ifq)
124{
125	if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) {
126		task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
127		ifq_run_start(ifq);
128	} else
129		task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
130}
131
132void
133ifq_start_task(void *p)
134{
135	struct ifqueue *ifq = p;
136	struct ifnet *ifp = ifq->ifq_if;
137
138	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
139	    ifq_empty(ifq) || ifq_is_oactive(ifq))
140		return;
141
142	ifp->if_qstart(ifq);
143}
144
145void
146ifq_restart_task(void *p)
147{
148	struct ifqueue *ifq = p;
149	struct ifnet *ifp = ifq->ifq_if;
150
151	ifq_clr_oactive(ifq);
152	ifp->if_qstart(ifq);
153}
154
155void
156ifq_bundle_task(void *p)
157{
158	struct ifqueue *ifq = p;
159
160	ifq_run_start(ifq);
161}
162
163void
164ifq_barrier(struct ifqueue *ifq)
165{
166	struct cond c = COND_INITIALIZER();
167	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
168
169	task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
170
171	if (ifq->ifq_serializer == NULL)
172		return;
173
174	ifq_serialize(ifq, &t);
175
176	cond_wait(&c, "ifqbar");
177}
178
179void
180ifq_barrier_task(void *p)
181{
182	struct cond *c = p;
183
184	cond_signal(c);
185}
186
187/*
188 * ifqueue mbuf queue API
189 */
190
191void
192ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
193{
194	ifq->ifq_if = ifp;
195	ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
196	ifq->ifq_softc = NULL;
197
198	mtx_init(&ifq->ifq_mtx, IPL_NET);
199
200	/* default to priq */
201	ifq->ifq_ops = &priq_ops;
202	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
203
204	ml_init(&ifq->ifq_free);
205	ifq->ifq_len = 0;
206
207	ifq->ifq_packets = 0;
208	ifq->ifq_bytes = 0;
209	ifq->ifq_qdrops = 0;
210	ifq->ifq_errors = 0;
211	ifq->ifq_mcasts = 0;
212
213	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
214	TAILQ_INIT(&ifq->ifq_task_list);
215	ifq->ifq_serializer = NULL;
216	task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
217
218	task_set(&ifq->ifq_start, ifq_start_task, ifq);
219	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
220
221	if (ifq->ifq_maxlen == 0)
222		ifq_set_maxlen(ifq, IFQ_MAXLEN);
223
224	ifq->ifq_idx = idx;
225}
226
227void
228ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
229{
230	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
231	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
232	struct mbuf *m;
233	const struct ifq_ops *oldops;
234	void *newq, *oldq;
235
236	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
237
238	mtx_enter(&ifq->ifq_mtx);
239	ifq->ifq_ops->ifqop_purge(ifq, &ml);
240	ifq->ifq_len = 0;
241
242	oldops = ifq->ifq_ops;
243	oldq = ifq->ifq_q;
244
245	ifq->ifq_ops = newops;
246	ifq->ifq_q = newq;
247
248	while ((m = ml_dequeue(&ml)) != NULL) {
249		m = ifq->ifq_ops->ifqop_enq(ifq, m);
250		if (m != NULL) {
251			ifq->ifq_qdrops++;
252			ml_enqueue(&free_ml, m);
253		} else
254			ifq->ifq_len++;
255	}
256	mtx_leave(&ifq->ifq_mtx);
257
258	oldops->ifqop_free(ifq->ifq_idx, oldq);
259
260	ml_purge(&free_ml);
261}
262
263void
264ifq_destroy(struct ifqueue *ifq)
265{
266	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
267
268	NET_ASSERT_UNLOCKED();
269	if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
270		taskq_barrier(ifq->ifq_softnet);
271
272	/* don't need to lock because this is the last use of the ifq */
273
274	ifq->ifq_ops->ifqop_purge(ifq, &ml);
275	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
276
277	ml_purge(&ml);
278}
279
280void
281ifq_add_data(struct ifqueue *ifq, struct if_data *data)
282{
283	mtx_enter(&ifq->ifq_mtx);
284	data->ifi_opackets += ifq->ifq_packets;
285	data->ifi_obytes += ifq->ifq_bytes;
286	data->ifi_oqdrops += ifq->ifq_qdrops;
287	data->ifi_omcasts += ifq->ifq_mcasts;
288	/* ifp->if_data.ifi_oerrors */
289	mtx_leave(&ifq->ifq_mtx);
290}
291
292int
293ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
294{
295	struct mbuf *dm;
296
297	mtx_enter(&ifq->ifq_mtx);
298	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
299	if (dm != m) {
300		ifq->ifq_packets++;
301		ifq->ifq_bytes += m->m_pkthdr.len;
302		if (ISSET(m->m_flags, M_MCAST))
303			ifq->ifq_mcasts++;
304	}
305
306	if (dm == NULL)
307		ifq->ifq_len++;
308	else
309		ifq->ifq_qdrops++;
310	mtx_leave(&ifq->ifq_mtx);
311
312	if (dm != NULL)
313		m_freem(dm);
314
315	return (dm == m ? ENOBUFS : 0);
316}
317
318static inline void
319ifq_deq_enter(struct ifqueue *ifq)
320{
321	mtx_enter(&ifq->ifq_mtx);
322}
323
324static inline void
325ifq_deq_leave(struct ifqueue *ifq)
326{
327	struct mbuf_list ml;
328
329	ml = ifq->ifq_free;
330	ml_init(&ifq->ifq_free);
331
332	mtx_leave(&ifq->ifq_mtx);
333
334	if (!ml_empty(&ml))
335		ml_purge(&ml);
336}
337
338struct mbuf *
339ifq_deq_begin(struct ifqueue *ifq)
340{
341	struct mbuf *m = NULL;
342	void *cookie;
343
344	ifq_deq_enter(ifq);
345	if (ifq->ifq_len == 0 ||
346	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
347		ifq_deq_leave(ifq);
348		return (NULL);
349	}
350
351	m->m_pkthdr.ph_cookie = cookie;
352
353	return (m);
354}
355
356void
357ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
358{
359	void *cookie;
360
361	KASSERT(m != NULL);
362	cookie = m->m_pkthdr.ph_cookie;
363
364	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
365	ifq->ifq_len--;
366	ifq_deq_leave(ifq);
367}
368
369void
370ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
371{
372	KASSERT(m != NULL);
373
374	ifq_deq_leave(ifq);
375}
376
377struct mbuf *
378ifq_dequeue(struct ifqueue *ifq)
379{
380	struct mbuf *m;
381
382	m = ifq_deq_begin(ifq);
383	if (m == NULL)
384		return (NULL);
385
386	ifq_deq_commit(ifq, m);
387
388	return (m);
389}
390
391int
392ifq_hdatalen(struct ifqueue *ifq)
393{
394	struct mbuf *m;
395	int len = 0;
396
397	m = ifq_deq_begin(ifq);
398	if (m != NULL) {
399		len = m->m_pkthdr.len;
400		ifq_deq_rollback(ifq, m);
401	}
402
403	return (len);
404}
405
406unsigned int
407ifq_purge(struct ifqueue *ifq)
408{
409	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
410	unsigned int rv;
411
412	mtx_enter(&ifq->ifq_mtx);
413	ifq->ifq_ops->ifqop_purge(ifq, &ml);
414	rv = ifq->ifq_len;
415	ifq->ifq_len = 0;
416	ifq->ifq_qdrops += rv;
417	mtx_leave(&ifq->ifq_mtx);
418
419	KASSERT(rv == ml_len(&ml));
420
421	ml_purge(&ml);
422
423	return (rv);
424}
425
426void *
427ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
428{
429	mtx_enter(&ifq->ifq_mtx);
430	if (ifq->ifq_ops == ops)
431		return (ifq->ifq_q);
432
433	mtx_leave(&ifq->ifq_mtx);
434
435	return (NULL);
436}
437
438void
439ifq_q_leave(struct ifqueue *ifq, void *q)
440{
441	KASSERT(q == ifq->ifq_q);
442	mtx_leave(&ifq->ifq_mtx);
443}
444
445void
446ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
447{
448	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
449
450	ifq->ifq_len--;
451	ifq->ifq_qdrops++;
452	ml_enqueue(&ifq->ifq_free, m);
453}
454
455void
456ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
457{
458	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
459
460	ifq->ifq_len -= ml_len(ml);
461	ifq->ifq_qdrops += ml_len(ml);
462	ml_enlist(&ifq->ifq_free, ml);
463}
464
465/*
466 * ifiq
467 */
468
469static void	ifiq_process(void *);
470
471void
472ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
473{
474	ifiq->ifiq_if = ifp;
475	ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
476	ifiq->ifiq_softc = NULL;
477
478	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
479	ml_init(&ifiq->ifiq_ml);
480	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
481	ifiq->ifiq_pressure = 0;
482
483	ifiq->ifiq_packets = 0;
484	ifiq->ifiq_bytes = 0;
485	ifiq->ifiq_qdrops = 0;
486	ifiq->ifiq_errors = 0;
487
488	ifiq->ifiq_idx = idx;
489}
490
491void
492ifiq_destroy(struct ifiqueue *ifiq)
493{
494	NET_ASSERT_UNLOCKED();
495	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
496		taskq_barrier(ifiq->ifiq_softnet);
497
498	/* don't need to lock because this is the last use of the ifiq */
499	ml_purge(&ifiq->ifiq_ml);
500}
501
502unsigned int ifiq_maxlen_drop = 2048 * 5;
503unsigned int ifiq_maxlen_return = 2048 * 3;
504
505int
506ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
507{
508	struct ifnet *ifp = ifiq->ifiq_if;
509	struct mbuf *m;
510	uint64_t packets;
511	uint64_t bytes = 0;
512	unsigned int len;
513#if NBPFILTER > 0
514	caddr_t if_bpf;
515#endif
516
517	if (ml_empty(ml))
518		return (0);
519
520	MBUF_LIST_FOREACH(ml, m) {
521		m->m_pkthdr.ph_ifidx = ifp->if_index;
522		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
523		bytes += m->m_pkthdr.len;
524	}
525	packets = ml_len(ml);
526
527#if NBPFILTER > 0
528	if_bpf = ifp->if_bpf;
529	if (if_bpf) {
530		struct mbuf_list ml0 = *ml;
531
532		ml_init(ml);
533
534		while ((m = ml_dequeue(&ml0)) != NULL) {
535			if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN))
536				m_freem(m);
537			else
538				ml_enqueue(ml, m);
539		}
540
541		if (ml_empty(ml)) {
542			mtx_enter(&ifiq->ifiq_mtx);
543			ifiq->ifiq_packets += packets;
544			ifiq->ifiq_bytes += bytes;
545			mtx_leave(&ifiq->ifiq_mtx);
546
547			return (0);
548		}
549	}
550#endif
551
552	mtx_enter(&ifiq->ifiq_mtx);
553	ifiq->ifiq_packets += packets;
554	ifiq->ifiq_bytes += bytes;
555
556	len = ml_len(&ifiq->ifiq_ml);
557	if (len > ifiq_maxlen_drop)
558		ifiq->ifiq_qdrops += ml_len(ml);
559	else
560		ml_enlist(&ifiq->ifiq_ml, ml);
561	mtx_leave(&ifiq->ifiq_mtx);
562
563	if (ml_empty(ml))
564		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
565	else
566		ml_purge(ml);
567
568	return (len > ifiq_maxlen_return);
569}
570
571void
572ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
573{
574	mtx_enter(&ifiq->ifiq_mtx);
575	data->ifi_ipackets += ifiq->ifiq_packets;
576	data->ifi_ibytes += ifiq->ifiq_bytes;
577	data->ifi_iqdrops += ifiq->ifiq_qdrops;
578	mtx_leave(&ifiq->ifiq_mtx);
579}
580
581int
582ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
583{
584	mtx_enter(&ifiq->ifiq_mtx);
585	ml_enqueue(&ifiq->ifiq_ml, m);
586	mtx_leave(&ifiq->ifiq_mtx);
587
588	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
589
590	return (0);
591}
592
593static void
594ifiq_process(void *arg)
595{
596	struct ifiqueue *ifiq = arg;
597	struct mbuf_list ml;
598
599	if (ifiq_empty(ifiq))
600		return;
601
602	mtx_enter(&ifiq->ifiq_mtx);
603	ml = ifiq->ifiq_ml;
604	ml_init(&ifiq->ifiq_ml);
605	mtx_leave(&ifiq->ifiq_mtx);
606
607	if_input_process(ifiq->ifiq_if, &ml);
608}
609
610int
611net_ifiq_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
612    void *newp, size_t newlen)
613{
614	int error = EOPNOTSUPP;
615/* pressure is disabled for 6.6-release */
616#if 0
617	int val;
618
619	if (namelen != 1)
620		return (EISDIR);
621
622	switch (name[0]) {
623	case NET_LINK_IFRXQ_PRESSURE_RETURN:
624		val = ifiq_pressure_return;
625		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
626		if (error != 0)
627			return (error);
628		if (val < 1 || val > ifiq_pressure_drop)
629			return (EINVAL);
630		ifiq_pressure_return = val;
631		break;
632	case NET_LINK_IFRXQ_PRESSURE_DROP:
633		val = ifiq_pressure_drop;
634		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
635		if (error != 0)
636			return (error);
637		if (ifiq_pressure_return > val)
638			return (EINVAL);
639		ifiq_pressure_drop = val;
640		break;
641	default:
642		error = EOPNOTSUPP;
643		break;
644	}
645#endif
646
647	return (error);
648}
649
650/*
651 * priq implementation
652 */
653
654unsigned int
655priq_idx(unsigned int nqueues, const struct mbuf *m)
656{
657	unsigned int flow = 0;
658
659	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
660		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
661
662	return (flow % nqueues);
663}
664
665void *
666priq_alloc(unsigned int idx, void *null)
667{
668	struct priq *pq;
669	int i;
670
671	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
672	for (i = 0; i < IFQ_NQUEUES; i++)
673		ml_init(&pq->pq_lists[i]);
674	return (pq);
675}
676
677void
678priq_free(unsigned int idx, void *pq)
679{
680	free(pq, M_DEVBUF, sizeof(struct priq));
681}
682
683struct mbuf *
684priq_enq(struct ifqueue *ifq, struct mbuf *m)
685{
686	struct priq *pq;
687	struct mbuf_list *pl;
688	struct mbuf *n = NULL;
689	unsigned int prio;
690
691	pq = ifq->ifq_q;
692	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
693
694	/* Find a lower priority queue to drop from */
695	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
696		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
697			pl = &pq->pq_lists[prio];
698			if (ml_len(pl) > 0) {
699				n = ml_dequeue(pl);
700				goto enqueue;
701			}
702		}
703		/*
704		 * There's no lower priority queue that we can
705		 * drop from so don't enqueue this one.
706		 */
707		return (m);
708	}
709
710 enqueue:
711	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
712	ml_enqueue(pl, m);
713
714	return (n);
715}
716
717struct mbuf *
718priq_deq_begin(struct ifqueue *ifq, void **cookiep)
719{
720	struct priq *pq = ifq->ifq_q;
721	struct mbuf_list *pl;
722	unsigned int prio = nitems(pq->pq_lists);
723	struct mbuf *m;
724
725	do {
726		pl = &pq->pq_lists[--prio];
727		m = MBUF_LIST_FIRST(pl);
728		if (m != NULL) {
729			*cookiep = pl;
730			return (m);
731		}
732	} while (prio > 0);
733
734	return (NULL);
735}
736
737void
738priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
739{
740	struct mbuf_list *pl = cookie;
741
742	KASSERT(MBUF_LIST_FIRST(pl) == m);
743
744	ml_dequeue(pl);
745}
746
747void
748priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
749{
750	struct priq *pq = ifq->ifq_q;
751	struct mbuf_list *pl;
752	unsigned int prio = nitems(pq->pq_lists);
753
754	do {
755		pl = &pq->pq_lists[--prio];
756		ml_enlist(ml, pl);
757	} while (prio > 0);
758}
759