ifq.c revision 1.20
1/*	$OpenBSD: ifq.c,v 1.20 2018/01/02 07:08:10 dlg Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bpfilter.h"
20
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/socket.h>
24#include <sys/mbuf.h>
25#include <sys/proc.h>
26
27#include <net/if.h>
28#include <net/if_var.h>
29
30#if NBPFILTER > 0
31#include <net/bpf.h>
32#endif
33
34/*
35 * priq glue
36 */
37unsigned int	 priq_idx(unsigned int, const struct mbuf *);
38struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
39struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
40void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
41void		 priq_purge(struct ifqueue *, struct mbuf_list *);
42
43void		*priq_alloc(unsigned int, void *);
44void		 priq_free(unsigned int, void *);
45
46const struct ifq_ops priq_ops = {
47	priq_idx,
48	priq_enq,
49	priq_deq_begin,
50	priq_deq_commit,
51	priq_purge,
52	priq_alloc,
53	priq_free,
54};
55
56const struct ifq_ops * const ifq_priq_ops = &priq_ops;
57
58/*
59 * priq internal structures
60 */
61
62struct priq {
63	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
64};
65
66/*
67 * ifqueue serialiser
68 */
69
70void	ifq_start_task(void *);
71void	ifq_restart_task(void *);
72void	ifq_barrier_task(void *);
73void	ifq_bundle_task(void *);
74
75#define TASK_ONQUEUE 0x1
76
77static inline void
78ifq_run_start(struct ifqueue *ifq)
79{
80	ifq_serialize(ifq, &ifq->ifq_start);
81}
82
83void
84ifq_serialize(struct ifqueue *ifq, struct task *t)
85{
86	struct task work;
87
88	if (ISSET(t->t_flags, TASK_ONQUEUE))
89		return;
90
91	mtx_enter(&ifq->ifq_task_mtx);
92	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
93		SET(t->t_flags, TASK_ONQUEUE);
94		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
95	}
96
97	if (ifq->ifq_serializer == NULL) {
98		ifq->ifq_serializer = curcpu();
99
100		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
101			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
102			CLR(t->t_flags, TASK_ONQUEUE);
103			work = *t; /* copy to caller to avoid races */
104
105			mtx_leave(&ifq->ifq_task_mtx);
106
107			(*work.t_func)(work.t_arg);
108
109			mtx_enter(&ifq->ifq_task_mtx);
110		}
111
112		ifq->ifq_serializer = NULL;
113	}
114	mtx_leave(&ifq->ifq_task_mtx);
115}
116
117int
118ifq_is_serialized(struct ifqueue *ifq)
119{
120	return (ifq->ifq_serializer == curcpu());
121}
122
123void
124ifq_start(struct ifqueue *ifq)
125{
126	if (ifq_len(ifq) >= min(4, ifq->ifq_maxlen)) {
127		task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
128		ifq_run_start(ifq);
129	} else
130		task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
131}
132
133void
134ifq_start_task(void *p)
135{
136	struct ifqueue *ifq = p;
137	struct ifnet *ifp = ifq->ifq_if;
138
139	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
140	    ifq_empty(ifq) || ifq_is_oactive(ifq))
141		return;
142
143	ifp->if_qstart(ifq);
144}
145
146void
147ifq_restart_task(void *p)
148{
149	struct ifqueue *ifq = p;
150	struct ifnet *ifp = ifq->ifq_if;
151
152	ifq_clr_oactive(ifq);
153	ifp->if_qstart(ifq);
154}
155
156void
157ifq_bundle_task(void *p)
158{
159	struct ifqueue *ifq = p;
160
161	ifq_run_start(ifq);
162}
163
164void
165ifq_barrier(struct ifqueue *ifq)
166{
167	struct cond c = COND_INITIALIZER();
168	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
169
170	if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) {
171		int netlocked = (rw_status(&netlock) == RW_WRITE);
172
173		if (netlocked) /* XXXSMP breaks atomicity */
174			NET_UNLOCK();
175
176		taskq_barrier(ifq->ifq_softnet);
177
178		if (netlocked)
179			NET_LOCK();
180	}
181
182	if (ifq->ifq_serializer == NULL)
183		return;
184
185	ifq_serialize(ifq, &t);
186
187	cond_wait(&c, "ifqbar");
188}
189
190void
191ifq_barrier_task(void *p)
192{
193	struct cond *c = p;
194
195	cond_signal(c);
196}
197
198/*
199 * ifqueue mbuf queue API
200 */
201
202void
203ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
204{
205	ifq->ifq_if = ifp;
206	ifq->ifq_softnet = net_tq(ifp->if_index);
207	ifq->ifq_softc = NULL;
208
209	mtx_init(&ifq->ifq_mtx, IPL_NET);
210	ifq->ifq_qdrops = 0;
211
212	/* default to priq */
213	ifq->ifq_ops = &priq_ops;
214	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
215
216	ml_init(&ifq->ifq_free);
217	ifq->ifq_len = 0;
218
219	ifq->ifq_packets = 0;
220	ifq->ifq_bytes = 0;
221	ifq->ifq_qdrops = 0;
222	ifq->ifq_errors = 0;
223	ifq->ifq_mcasts = 0;
224
225	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
226	TAILQ_INIT(&ifq->ifq_task_list);
227	ifq->ifq_serializer = NULL;
228	task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
229
230	task_set(&ifq->ifq_start, ifq_start_task, ifq);
231	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
232
233	if (ifq->ifq_maxlen == 0)
234		ifq_set_maxlen(ifq, IFQ_MAXLEN);
235
236	ifq->ifq_idx = idx;
237}
238
239void
240ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
241{
242	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
243	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
244	struct mbuf *m;
245	const struct ifq_ops *oldops;
246	void *newq, *oldq;
247
248	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
249
250	mtx_enter(&ifq->ifq_mtx);
251	ifq->ifq_ops->ifqop_purge(ifq, &ml);
252	ifq->ifq_len = 0;
253
254	oldops = ifq->ifq_ops;
255	oldq = ifq->ifq_q;
256
257	ifq->ifq_ops = newops;
258	ifq->ifq_q = newq;
259
260	while ((m = ml_dequeue(&ml)) != NULL) {
261		m = ifq->ifq_ops->ifqop_enq(ifq, m);
262		if (m != NULL) {
263			ifq->ifq_qdrops++;
264			ml_enqueue(&free_ml, m);
265		} else
266			ifq->ifq_len++;
267	}
268	mtx_leave(&ifq->ifq_mtx);
269
270	oldops->ifqop_free(ifq->ifq_idx, oldq);
271
272	ml_purge(&free_ml);
273}
274
275void
276ifq_destroy(struct ifqueue *ifq)
277{
278	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
279
280	ifq_barrier(ifq); /* ensure nothing is running with the ifq */
281
282	/* don't need to lock because this is the last use of the ifq */
283
284	ifq->ifq_ops->ifqop_purge(ifq, &ml);
285	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
286
287	ml_purge(&ml);
288}
289
290void
291ifq_add_data(struct ifqueue *ifq, struct if_data *data)
292{
293	mtx_enter(&ifq->ifq_mtx);
294	data->ifi_opackets += ifq->ifq_packets;
295	data->ifi_obytes += ifq->ifq_bytes;
296	data->ifi_oqdrops += ifq->ifq_qdrops;
297	data->ifi_omcasts += ifq->ifq_mcasts;
298	/* ifp->if_data.ifi_oerrors */
299	mtx_leave(&ifq->ifq_mtx);
300}
301
302int
303ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
304{
305	struct mbuf *dm;
306
307	mtx_enter(&ifq->ifq_mtx);
308	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
309	if (dm != m) {
310		ifq->ifq_packets++;
311		ifq->ifq_bytes += m->m_pkthdr.len;
312		if (ISSET(m->m_flags, M_MCAST))
313			ifq->ifq_mcasts++;
314	}
315
316	if (dm == NULL)
317		ifq->ifq_len++;
318	else
319		ifq->ifq_qdrops++;
320	mtx_leave(&ifq->ifq_mtx);
321
322	if (dm != NULL)
323		m_freem(dm);
324
325	return (dm == m ? ENOBUFS : 0);
326}
327
328static inline void
329ifq_deq_enter(struct ifqueue *ifq)
330{
331	mtx_enter(&ifq->ifq_mtx);
332}
333
334static inline void
335ifq_deq_leave(struct ifqueue *ifq)
336{
337	struct mbuf_list ml;
338
339	ml = ifq->ifq_free;
340	ml_init(&ifq->ifq_free);
341
342	mtx_leave(&ifq->ifq_mtx);
343
344	if (!ml_empty(&ml))
345		ml_purge(&ml);
346}
347
348struct mbuf *
349ifq_deq_begin(struct ifqueue *ifq)
350{
351	struct mbuf *m = NULL;
352	void *cookie;
353
354	ifq_deq_enter(ifq);
355	if (ifq->ifq_len == 0 ||
356	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
357		ifq_deq_leave(ifq);
358		return (NULL);
359	}
360
361	m->m_pkthdr.ph_cookie = cookie;
362
363	return (m);
364}
365
366void
367ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
368{
369	void *cookie;
370
371	KASSERT(m != NULL);
372	cookie = m->m_pkthdr.ph_cookie;
373
374	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
375	ifq->ifq_len--;
376	ifq_deq_leave(ifq);
377}
378
379void
380ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
381{
382	KASSERT(m != NULL);
383
384	ifq_deq_leave(ifq);
385}
386
387struct mbuf *
388ifq_dequeue(struct ifqueue *ifq)
389{
390	struct mbuf *m;
391
392	m = ifq_deq_begin(ifq);
393	if (m == NULL)
394		return (NULL);
395
396	ifq_deq_commit(ifq, m);
397
398	return (m);
399}
400
401unsigned int
402ifq_purge(struct ifqueue *ifq)
403{
404	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
405	unsigned int rv;
406
407	mtx_enter(&ifq->ifq_mtx);
408	ifq->ifq_ops->ifqop_purge(ifq, &ml);
409	rv = ifq->ifq_len;
410	ifq->ifq_len = 0;
411	ifq->ifq_qdrops += rv;
412	mtx_leave(&ifq->ifq_mtx);
413
414	KASSERT(rv == ml_len(&ml));
415
416	ml_purge(&ml);
417
418	return (rv);
419}
420
421void *
422ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
423{
424	mtx_enter(&ifq->ifq_mtx);
425	if (ifq->ifq_ops == ops)
426		return (ifq->ifq_q);
427
428	mtx_leave(&ifq->ifq_mtx);
429
430	return (NULL);
431}
432
433void
434ifq_q_leave(struct ifqueue *ifq, void *q)
435{
436	KASSERT(q == ifq->ifq_q);
437	mtx_leave(&ifq->ifq_mtx);
438}
439
440void
441ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
442{
443	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
444
445	ifq->ifq_len--;
446	ifq->ifq_qdrops++;
447	ml_enqueue(&ifq->ifq_free, m);
448}
449
450void
451ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
452{
453	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
454
455	ifq->ifq_len -= ml_len(ml);
456	ifq->ifq_qdrops += ml_len(ml);
457	ml_enlist(&ifq->ifq_free, ml);
458}
459
460/*
461 * ifiq
462 */
463
464static void	ifiq_process(void *);
465
466void
467ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
468{
469	ifiq->ifiq_if = ifp;
470	ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
471	ifiq->ifiq_softc = NULL;
472
473	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
474	ml_init(&ifiq->ifiq_ml);
475	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
476
477	ifiq->ifiq_qdrops = 0;
478	ifiq->ifiq_packets = 0;
479	ifiq->ifiq_bytes = 0;
480	ifiq->ifiq_qdrops = 0;
481	ifiq->ifiq_errors = 0;
482
483	ifiq->ifiq_idx = idx;
484}
485
486void
487ifiq_destroy(struct ifiqueue *ifiq)
488{
489	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) {
490		int netlocked = (rw_status(&netlock) == RW_WRITE);
491
492		if (netlocked) /* XXXSMP breaks atomicity */
493			NET_UNLOCK();
494
495		taskq_barrier(ifiq->ifiq_softnet);
496
497		if (netlocked)
498			NET_LOCK();
499	}
500
501	/* don't need to lock because this is the last use of the ifiq */
502	ml_purge(&ifiq->ifiq_ml);
503}
504
505int
506ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm)
507{
508	struct ifnet *ifp = ifiq->ifiq_if;
509	struct mbuf *m;
510	uint64_t packets;
511	uint64_t bytes = 0;
512#if NBPFILTER > 0
513	caddr_t if_bpf;
514#endif
515	int rv = 1;
516
517	if (ml_empty(ml))
518		return (0);
519
520	MBUF_LIST_FOREACH(ml, m) {
521		m->m_pkthdr.ph_ifidx = ifp->if_index;
522		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
523		bytes += m->m_pkthdr.len;
524	}
525	packets = ml_len(ml);
526
527#if NBPFILTER > 0
528	if_bpf = ifp->if_bpf;
529	if (if_bpf) {
530		struct mbuf_list ml0 = *ml;
531
532		ml_init(ml);
533
534		while ((m = ml_dequeue(&ml0)) != NULL) {
535			if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN))
536				m_freem(m);
537			else
538				ml_enqueue(ml, m);
539		}
540
541		if (ml_empty(ml)) {
542			mtx_enter(&ifiq->ifiq_mtx);
543			ifiq->ifiq_packets += packets;
544			ifiq->ifiq_bytes += bytes;
545			mtx_leave(&ifiq->ifiq_mtx);
546
547			return (0);
548		}
549	}
550#endif
551
552	mtx_enter(&ifiq->ifiq_mtx);
553	ifiq->ifiq_packets += packets;
554	ifiq->ifiq_bytes += bytes;
555
556	if (ifiq_len(ifiq) >= cwm * 5)
557		ifiq->ifiq_qdrops += ml_len(ml);
558	else {
559		rv = (ifiq_len(ifiq) >= cwm * 3);
560		ml_enlist(&ifiq->ifiq_ml, ml);
561	}
562	mtx_leave(&ifiq->ifiq_mtx);
563
564	if (ml_empty(ml))
565		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
566	else
567		ml_purge(ml);
568
569	return (rv);
570}
571
572void
573ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
574{
575	mtx_enter(&ifiq->ifiq_mtx);
576	data->ifi_ipackets += ifiq->ifiq_packets;
577	data->ifi_ibytes += ifiq->ifiq_bytes;
578	data->ifi_iqdrops += ifiq->ifiq_qdrops;
579	mtx_leave(&ifiq->ifiq_mtx);
580}
581
582void
583ifiq_barrier(struct ifiqueue *ifiq)
584{
585	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
586		taskq_barrier(ifiq->ifiq_softnet);
587}
588
589int
590ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
591{
592	mtx_enter(&ifiq->ifiq_mtx);
593	ml_enqueue(&ifiq->ifiq_ml, m);
594	mtx_leave(&ifiq->ifiq_mtx);
595
596	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
597
598	return (0);
599}
600
601static void
602ifiq_process(void *arg)
603{
604	struct ifiqueue *ifiq = arg;
605	struct mbuf_list ml;
606
607	if (ifiq_empty(ifiq))
608		return;
609
610	mtx_enter(&ifiq->ifiq_mtx);
611	ml = ifiq->ifiq_ml;
612	ml_init(&ifiq->ifiq_ml);
613	mtx_leave(&ifiq->ifiq_mtx);
614
615	if_input_process(ifiq->ifiq_if, &ml);
616}
617
618/*
619 * priq implementation
620 */
621
622unsigned int
623priq_idx(unsigned int nqueues, const struct mbuf *m)
624{
625	unsigned int flow = 0;
626
627	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
628		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
629
630	return (flow % nqueues);
631}
632
633void *
634priq_alloc(unsigned int idx, void *null)
635{
636	struct priq *pq;
637	int i;
638
639	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
640	for (i = 0; i < IFQ_NQUEUES; i++)
641		ml_init(&pq->pq_lists[i]);
642	return (pq);
643}
644
645void
646priq_free(unsigned int idx, void *pq)
647{
648	free(pq, M_DEVBUF, sizeof(struct priq));
649}
650
651struct mbuf *
652priq_enq(struct ifqueue *ifq, struct mbuf *m)
653{
654	struct priq *pq;
655	struct mbuf_list *pl;
656	struct mbuf *n = NULL;
657	unsigned int prio;
658
659	pq = ifq->ifq_q;
660	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
661
662	/* Find a lower priority queue to drop from */
663	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
664		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
665			pl = &pq->pq_lists[prio];
666			if (ml_len(pl) > 0) {
667				n = ml_dequeue(pl);
668				goto enqueue;
669			}
670		}
671		/*
672		 * There's no lower priority queue that we can
673		 * drop from so don't enqueue this one.
674		 */
675		return (m);
676	}
677
678 enqueue:
679	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
680	ml_enqueue(pl, m);
681
682	return (n);
683}
684
685struct mbuf *
686priq_deq_begin(struct ifqueue *ifq, void **cookiep)
687{
688	struct priq *pq = ifq->ifq_q;
689	struct mbuf_list *pl;
690	unsigned int prio = nitems(pq->pq_lists);
691	struct mbuf *m;
692
693	do {
694		pl = &pq->pq_lists[--prio];
695		m = MBUF_LIST_FIRST(pl);
696		if (m != NULL) {
697			*cookiep = pl;
698			return (m);
699		}
700	} while (prio > 0);
701
702	return (NULL);
703}
704
705void
706priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
707{
708	struct mbuf_list *pl = cookie;
709
710	KASSERT(MBUF_LIST_FIRST(pl) == m);
711
712	ml_dequeue(pl);
713}
714
715void
716priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
717{
718	struct priq *pq = ifq->ifq_q;
719	struct mbuf_list *pl;
720	unsigned int prio = nitems(pq->pq_lists);
721
722	do {
723		pl = &pq->pq_lists[--prio];
724		ml_enlist(ml, pl);
725	} while (prio > 0);
726}
727