ifq.c revision 1.31
1/*	$OpenBSD: ifq.c,v 1.31 2019/04/16 04:04:19 dlg Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bpfilter.h"
20
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/socket.h>
24#include <sys/mbuf.h>
25#include <sys/proc.h>
26
27#include <net/if.h>
28#include <net/if_var.h>
29
30#if NBPFILTER > 0
31#include <net/bpf.h>
32#endif
33
34/*
35 * priq glue
36 */
37unsigned int	 priq_idx(unsigned int, const struct mbuf *);
38struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
39struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
40void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
41void		 priq_purge(struct ifqueue *, struct mbuf_list *);
42
43void		*priq_alloc(unsigned int, void *);
44void		 priq_free(unsigned int, void *);
45
46const struct ifq_ops priq_ops = {
47	priq_idx,
48	priq_enq,
49	priq_deq_begin,
50	priq_deq_commit,
51	priq_purge,
52	priq_alloc,
53	priq_free,
54};
55
56const struct ifq_ops * const ifq_priq_ops = &priq_ops;
57
58/*
59 * priq internal structures
60 */
61
62struct priq {
63	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
64};
65
66/*
67 * ifqueue serialiser
68 */
69
70void	ifq_start_task(void *);
71void	ifq_restart_task(void *);
72void	ifq_barrier_task(void *);
73void	ifq_bundle_task(void *);
74
75static inline void
76ifq_run_start(struct ifqueue *ifq)
77{
78	ifq_serialize(ifq, &ifq->ifq_start);
79}
80
81void
82ifq_serialize(struct ifqueue *ifq, struct task *t)
83{
84	struct task work;
85
86	if (ISSET(t->t_flags, TASK_ONQUEUE))
87		return;
88
89	mtx_enter(&ifq->ifq_task_mtx);
90	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
91		SET(t->t_flags, TASK_ONQUEUE);
92		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
93	}
94
95	if (ifq->ifq_serializer == NULL) {
96		ifq->ifq_serializer = curcpu();
97
98		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
99			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
100			CLR(t->t_flags, TASK_ONQUEUE);
101			work = *t; /* copy to caller to avoid races */
102
103			mtx_leave(&ifq->ifq_task_mtx);
104
105			(*work.t_func)(work.t_arg);
106
107			mtx_enter(&ifq->ifq_task_mtx);
108		}
109
110		ifq->ifq_serializer = NULL;
111	}
112	mtx_leave(&ifq->ifq_task_mtx);
113}
114
115int
116ifq_is_serialized(struct ifqueue *ifq)
117{
118	return (ifq->ifq_serializer == curcpu());
119}
120
121void
122ifq_start(struct ifqueue *ifq)
123{
124	if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) {
125		task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
126		ifq_run_start(ifq);
127	} else
128		task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
129}
130
131void
132ifq_start_task(void *p)
133{
134	struct ifqueue *ifq = p;
135	struct ifnet *ifp = ifq->ifq_if;
136
137	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
138	    ifq_empty(ifq) || ifq_is_oactive(ifq))
139		return;
140
141	ifp->if_qstart(ifq);
142}
143
144void
145ifq_restart_task(void *p)
146{
147	struct ifqueue *ifq = p;
148	struct ifnet *ifp = ifq->ifq_if;
149
150	ifq_clr_oactive(ifq);
151	ifp->if_qstart(ifq);
152}
153
154void
155ifq_bundle_task(void *p)
156{
157	struct ifqueue *ifq = p;
158
159	ifq_run_start(ifq);
160}
161
162void
163ifq_barrier(struct ifqueue *ifq)
164{
165	struct cond c = COND_INITIALIZER();
166	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
167
168	task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
169
170	if (ifq->ifq_serializer == NULL)
171		return;
172
173	ifq_serialize(ifq, &t);
174
175	cond_wait(&c, "ifqbar");
176}
177
178void
179ifq_barrier_task(void *p)
180{
181	struct cond *c = p;
182
183	cond_signal(c);
184}
185
186/*
187 * ifqueue mbuf queue API
188 */
189
190void
191ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
192{
193	ifq->ifq_if = ifp;
194	ifq->ifq_softnet = net_tq(ifp->if_index); /* + idx */
195	ifq->ifq_softc = NULL;
196
197	mtx_init(&ifq->ifq_mtx, IPL_NET);
198
199	/* default to priq */
200	ifq->ifq_ops = &priq_ops;
201	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
202
203	ml_init(&ifq->ifq_free);
204	ifq->ifq_len = 0;
205
206	ifq->ifq_packets = 0;
207	ifq->ifq_bytes = 0;
208	ifq->ifq_qdrops = 0;
209	ifq->ifq_errors = 0;
210	ifq->ifq_mcasts = 0;
211
212	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
213	TAILQ_INIT(&ifq->ifq_task_list);
214	ifq->ifq_serializer = NULL;
215	task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
216
217	task_set(&ifq->ifq_start, ifq_start_task, ifq);
218	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
219
220	if (ifq->ifq_maxlen == 0)
221		ifq_set_maxlen(ifq, IFQ_MAXLEN);
222
223	ifq->ifq_idx = idx;
224}
225
226void
227ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
228{
229	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
230	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
231	struct mbuf *m;
232	const struct ifq_ops *oldops;
233	void *newq, *oldq;
234
235	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
236
237	mtx_enter(&ifq->ifq_mtx);
238	ifq->ifq_ops->ifqop_purge(ifq, &ml);
239	ifq->ifq_len = 0;
240
241	oldops = ifq->ifq_ops;
242	oldq = ifq->ifq_q;
243
244	ifq->ifq_ops = newops;
245	ifq->ifq_q = newq;
246
247	while ((m = ml_dequeue(&ml)) != NULL) {
248		m = ifq->ifq_ops->ifqop_enq(ifq, m);
249		if (m != NULL) {
250			ifq->ifq_qdrops++;
251			ml_enqueue(&free_ml, m);
252		} else
253			ifq->ifq_len++;
254	}
255	mtx_leave(&ifq->ifq_mtx);
256
257	oldops->ifqop_free(ifq->ifq_idx, oldq);
258
259	ml_purge(&free_ml);
260}
261
262void
263ifq_destroy(struct ifqueue *ifq)
264{
265	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
266
267	NET_ASSERT_UNLOCKED();
268	if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
269		taskq_barrier(ifq->ifq_softnet);
270
271	/* don't need to lock because this is the last use of the ifq */
272
273	ifq->ifq_ops->ifqop_purge(ifq, &ml);
274	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
275
276	ml_purge(&ml);
277}
278
279void
280ifq_add_data(struct ifqueue *ifq, struct if_data *data)
281{
282	mtx_enter(&ifq->ifq_mtx);
283	data->ifi_opackets += ifq->ifq_packets;
284	data->ifi_obytes += ifq->ifq_bytes;
285	data->ifi_oqdrops += ifq->ifq_qdrops;
286	data->ifi_omcasts += ifq->ifq_mcasts;
287	/* ifp->if_data.ifi_oerrors */
288	mtx_leave(&ifq->ifq_mtx);
289}
290
291int
292ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
293{
294	struct mbuf *dm;
295
296	mtx_enter(&ifq->ifq_mtx);
297	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
298	if (dm != m) {
299		ifq->ifq_packets++;
300		ifq->ifq_bytes += m->m_pkthdr.len;
301		if (ISSET(m->m_flags, M_MCAST))
302			ifq->ifq_mcasts++;
303	}
304
305	if (dm == NULL)
306		ifq->ifq_len++;
307	else
308		ifq->ifq_qdrops++;
309	mtx_leave(&ifq->ifq_mtx);
310
311	if (dm != NULL)
312		m_freem(dm);
313
314	return (dm == m ? ENOBUFS : 0);
315}
316
317static inline void
318ifq_deq_enter(struct ifqueue *ifq)
319{
320	mtx_enter(&ifq->ifq_mtx);
321}
322
323static inline void
324ifq_deq_leave(struct ifqueue *ifq)
325{
326	struct mbuf_list ml;
327
328	ml = ifq->ifq_free;
329	ml_init(&ifq->ifq_free);
330
331	mtx_leave(&ifq->ifq_mtx);
332
333	if (!ml_empty(&ml))
334		ml_purge(&ml);
335}
336
337struct mbuf *
338ifq_deq_begin(struct ifqueue *ifq)
339{
340	struct mbuf *m = NULL;
341	void *cookie;
342
343	ifq_deq_enter(ifq);
344	if (ifq->ifq_len == 0 ||
345	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
346		ifq_deq_leave(ifq);
347		return (NULL);
348	}
349
350	m->m_pkthdr.ph_cookie = cookie;
351
352	return (m);
353}
354
355void
356ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
357{
358	void *cookie;
359
360	KASSERT(m != NULL);
361	cookie = m->m_pkthdr.ph_cookie;
362
363	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
364	ifq->ifq_len--;
365	ifq_deq_leave(ifq);
366}
367
368void
369ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
370{
371	KASSERT(m != NULL);
372
373	ifq_deq_leave(ifq);
374}
375
376struct mbuf *
377ifq_dequeue(struct ifqueue *ifq)
378{
379	struct mbuf *m;
380
381	m = ifq_deq_begin(ifq);
382	if (m == NULL)
383		return (NULL);
384
385	ifq_deq_commit(ifq, m);
386
387	return (m);
388}
389
390int
391ifq_hdatalen(struct ifqueue *ifq)
392{
393	struct mbuf *m;
394	int len = 0;
395
396	m = ifq_deq_begin(ifq);
397	if (m != NULL) {
398		len = m->m_pkthdr.len;
399		ifq_deq_commit(ifq, m);
400	}
401
402	return (len);
403}
404
405unsigned int
406ifq_purge(struct ifqueue *ifq)
407{
408	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
409	unsigned int rv;
410
411	mtx_enter(&ifq->ifq_mtx);
412	ifq->ifq_ops->ifqop_purge(ifq, &ml);
413	rv = ifq->ifq_len;
414	ifq->ifq_len = 0;
415	ifq->ifq_qdrops += rv;
416	mtx_leave(&ifq->ifq_mtx);
417
418	KASSERT(rv == ml_len(&ml));
419
420	ml_purge(&ml);
421
422	return (rv);
423}
424
425void *
426ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
427{
428	mtx_enter(&ifq->ifq_mtx);
429	if (ifq->ifq_ops == ops)
430		return (ifq->ifq_q);
431
432	mtx_leave(&ifq->ifq_mtx);
433
434	return (NULL);
435}
436
437void
438ifq_q_leave(struct ifqueue *ifq, void *q)
439{
440	KASSERT(q == ifq->ifq_q);
441	mtx_leave(&ifq->ifq_mtx);
442}
443
444void
445ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
446{
447	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
448
449	ifq->ifq_len--;
450	ifq->ifq_qdrops++;
451	ml_enqueue(&ifq->ifq_free, m);
452}
453
454void
455ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
456{
457	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
458
459	ifq->ifq_len -= ml_len(ml);
460	ifq->ifq_qdrops += ml_len(ml);
461	ml_enlist(&ifq->ifq_free, ml);
462}
463
464/*
465 * ifiq
466 */
467
468static void	ifiq_process(void *);
469
470void
471ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
472{
473	ifiq->ifiq_if = ifp;
474	ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
475	ifiq->ifiq_softc = NULL;
476
477	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
478	ml_init(&ifiq->ifiq_ml);
479	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
480	ifiq->ifiq_pressure = 0;
481
482	ifiq->ifiq_packets = 0;
483	ifiq->ifiq_bytes = 0;
484	ifiq->ifiq_qdrops = 0;
485	ifiq->ifiq_errors = 0;
486
487	ifiq->ifiq_idx = idx;
488}
489
490void
491ifiq_destroy(struct ifiqueue *ifiq)
492{
493	NET_ASSERT_UNLOCKED();
494	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
495		taskq_barrier(ifiq->ifiq_softnet);
496
497	/* don't need to lock because this is the last use of the ifiq */
498	ml_purge(&ifiq->ifiq_ml);
499}
500
501unsigned int ifiq_maxlen_drop = 2048 * 5;
502unsigned int ifiq_maxlen_return = 2048 * 3;
503
504int
505ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
506{
507	struct ifnet *ifp = ifiq->ifiq_if;
508	struct mbuf *m;
509	uint64_t packets;
510	uint64_t bytes = 0;
511	unsigned int len;
512#if NBPFILTER > 0
513	caddr_t if_bpf;
514#endif
515
516	if (ml_empty(ml))
517		return (0);
518
519	MBUF_LIST_FOREACH(ml, m) {
520		m->m_pkthdr.ph_ifidx = ifp->if_index;
521		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
522		bytes += m->m_pkthdr.len;
523	}
524	packets = ml_len(ml);
525
526#if NBPFILTER > 0
527	if_bpf = ifp->if_bpf;
528	if (if_bpf) {
529		struct mbuf_list ml0 = *ml;
530
531		ml_init(ml);
532
533		while ((m = ml_dequeue(&ml0)) != NULL) {
534			if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN))
535				m_freem(m);
536			else
537				ml_enqueue(ml, m);
538		}
539
540		if (ml_empty(ml)) {
541			mtx_enter(&ifiq->ifiq_mtx);
542			ifiq->ifiq_packets += packets;
543			ifiq->ifiq_bytes += bytes;
544			mtx_leave(&ifiq->ifiq_mtx);
545
546			return (0);
547		}
548	}
549#endif
550
551	mtx_enter(&ifiq->ifiq_mtx);
552	ifiq->ifiq_packets += packets;
553	ifiq->ifiq_bytes += bytes;
554
555	len = ml_len(&ifiq->ifiq_ml);
556	if (len > ifiq_maxlen_drop)
557		ifiq->ifiq_qdrops += ml_len(ml);
558	else
559		ml_enlist(&ifiq->ifiq_ml, ml);
560	mtx_leave(&ifiq->ifiq_mtx);
561
562	if (ml_empty(ml))
563		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
564	else
565		ml_purge(ml);
566
567	return (len > ifiq_maxlen_return);
568}
569
570void
571ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
572{
573	mtx_enter(&ifiq->ifiq_mtx);
574	data->ifi_ipackets += ifiq->ifiq_packets;
575	data->ifi_ibytes += ifiq->ifiq_bytes;
576	data->ifi_iqdrops += ifiq->ifiq_qdrops;
577	mtx_leave(&ifiq->ifiq_mtx);
578}
579
580int
581ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
582{
583	mtx_enter(&ifiq->ifiq_mtx);
584	ml_enqueue(&ifiq->ifiq_ml, m);
585	mtx_leave(&ifiq->ifiq_mtx);
586
587	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
588
589	return (0);
590}
591
592static void
593ifiq_process(void *arg)
594{
595	struct ifiqueue *ifiq = arg;
596	struct mbuf_list ml;
597
598	if (ifiq_empty(ifiq))
599		return;
600
601	mtx_enter(&ifiq->ifiq_mtx);
602	ml = ifiq->ifiq_ml;
603	ml_init(&ifiq->ifiq_ml);
604	mtx_leave(&ifiq->ifiq_mtx);
605
606	if_input_process(ifiq->ifiq_if, &ml);
607}
608
609/*
610 * priq implementation
611 */
612
613unsigned int
614priq_idx(unsigned int nqueues, const struct mbuf *m)
615{
616	unsigned int flow = 0;
617
618	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
619		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
620
621	return (flow % nqueues);
622}
623
624void *
625priq_alloc(unsigned int idx, void *null)
626{
627	struct priq *pq;
628	int i;
629
630	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
631	for (i = 0; i < IFQ_NQUEUES; i++)
632		ml_init(&pq->pq_lists[i]);
633	return (pq);
634}
635
636void
637priq_free(unsigned int idx, void *pq)
638{
639	free(pq, M_DEVBUF, sizeof(struct priq));
640}
641
642struct mbuf *
643priq_enq(struct ifqueue *ifq, struct mbuf *m)
644{
645	struct priq *pq;
646	struct mbuf_list *pl;
647	struct mbuf *n = NULL;
648	unsigned int prio;
649
650	pq = ifq->ifq_q;
651	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
652
653	/* Find a lower priority queue to drop from */
654	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
655		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
656			pl = &pq->pq_lists[prio];
657			if (ml_len(pl) > 0) {
658				n = ml_dequeue(pl);
659				goto enqueue;
660			}
661		}
662		/*
663		 * There's no lower priority queue that we can
664		 * drop from so don't enqueue this one.
665		 */
666		return (m);
667	}
668
669 enqueue:
670	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
671	ml_enqueue(pl, m);
672
673	return (n);
674}
675
676struct mbuf *
677priq_deq_begin(struct ifqueue *ifq, void **cookiep)
678{
679	struct priq *pq = ifq->ifq_q;
680	struct mbuf_list *pl;
681	unsigned int prio = nitems(pq->pq_lists);
682	struct mbuf *m;
683
684	do {
685		pl = &pq->pq_lists[--prio];
686		m = MBUF_LIST_FIRST(pl);
687		if (m != NULL) {
688			*cookiep = pl;
689			return (m);
690		}
691	} while (prio > 0);
692
693	return (NULL);
694}
695
696void
697priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
698{
699	struct mbuf_list *pl = cookie;
700
701	KASSERT(MBUF_LIST_FIRST(pl) == m);
702
703	ml_dequeue(pl);
704}
705
706void
707priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
708{
709	struct priq *pq = ifq->ifq_q;
710	struct mbuf_list *pl;
711	unsigned int prio = nitems(pq->pq_lists);
712
713	do {
714		pl = &pq->pq_lists[--prio];
715		ml_enlist(ml, pl);
716	} while (prio > 0);
717}
718