ifq.c revision 1.29
1/*	$OpenBSD: ifq.c,v 1.29 2019/03/29 04:12:55 dlg Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bpfilter.h"
20
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/socket.h>
24#include <sys/mbuf.h>
25#include <sys/proc.h>
26
27#include <net/if.h>
28#include <net/if_var.h>
29
30#if NBPFILTER > 0
31#include <net/bpf.h>
32#endif
33
34/*
35 * priq glue
36 */
37unsigned int	 priq_idx(unsigned int, const struct mbuf *);
38struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
39struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
40void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
41void		 priq_purge(struct ifqueue *, struct mbuf_list *);
42
43void		*priq_alloc(unsigned int, void *);
44void		 priq_free(unsigned int, void *);
45
46const struct ifq_ops priq_ops = {
47	priq_idx,
48	priq_enq,
49	priq_deq_begin,
50	priq_deq_commit,
51	priq_purge,
52	priq_alloc,
53	priq_free,
54};
55
56const struct ifq_ops * const ifq_priq_ops = &priq_ops;
57
58/*
59 * priq internal structures
60 */
61
62struct priq {
63	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
64};
65
66/*
67 * ifqueue serialiser
68 */
69
70void	ifq_start_task(void *);
71void	ifq_restart_task(void *);
72void	ifq_barrier_task(void *);
73
74void
75ifq_serialize(struct ifqueue *ifq, struct task *t)
76{
77	struct task work;
78
79	if (ISSET(t->t_flags, TASK_ONQUEUE))
80		return;
81
82	mtx_enter(&ifq->ifq_task_mtx);
83	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
84		SET(t->t_flags, TASK_ONQUEUE);
85		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
86	}
87
88	if (ifq->ifq_serializer == NULL) {
89		ifq->ifq_serializer = curcpu();
90
91		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
92			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
93			CLR(t->t_flags, TASK_ONQUEUE);
94			work = *t; /* copy to caller to avoid races */
95
96			mtx_leave(&ifq->ifq_task_mtx);
97
98			(*work.t_func)(work.t_arg);
99
100			mtx_enter(&ifq->ifq_task_mtx);
101		}
102
103		ifq->ifq_serializer = NULL;
104	}
105	mtx_leave(&ifq->ifq_task_mtx);
106}
107
108int
109ifq_is_serialized(struct ifqueue *ifq)
110{
111	return (ifq->ifq_serializer == curcpu());
112}
113
114void
115ifq_start_task(void *p)
116{
117	struct ifqueue *ifq = p;
118	struct ifnet *ifp = ifq->ifq_if;
119
120	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
121	    ifq_empty(ifq) || ifq_is_oactive(ifq))
122		return;
123
124	ifp->if_qstart(ifq);
125}
126
127void
128ifq_restart_task(void *p)
129{
130	struct ifqueue *ifq = p;
131	struct ifnet *ifp = ifq->ifq_if;
132
133	ifq_clr_oactive(ifq);
134	ifp->if_qstart(ifq);
135}
136
137void
138ifq_barrier(struct ifqueue *ifq)
139{
140	struct cond c = COND_INITIALIZER();
141	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
142
143	if (ifq->ifq_serializer == NULL)
144		return;
145
146	ifq_serialize(ifq, &t);
147
148	cond_wait(&c, "ifqbar");
149}
150
151void
152ifq_barrier_task(void *p)
153{
154	struct cond *c = p;
155
156	cond_signal(c);
157}
158
159/*
160 * ifqueue mbuf queue API
161 */
162
163void
164ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
165{
166	ifq->ifq_if = ifp;
167	ifq->ifq_softc = NULL;
168
169	mtx_init(&ifq->ifq_mtx, IPL_NET);
170
171	/* default to priq */
172	ifq->ifq_ops = &priq_ops;
173	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
174
175	ml_init(&ifq->ifq_free);
176	ifq->ifq_len = 0;
177
178	ifq->ifq_packets = 0;
179	ifq->ifq_bytes = 0;
180	ifq->ifq_qdrops = 0;
181	ifq->ifq_errors = 0;
182	ifq->ifq_mcasts = 0;
183
184	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
185	TAILQ_INIT(&ifq->ifq_task_list);
186	ifq->ifq_serializer = NULL;
187
188	task_set(&ifq->ifq_start, ifq_start_task, ifq);
189	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
190
191	if (ifq->ifq_maxlen == 0)
192		ifq_set_maxlen(ifq, IFQ_MAXLEN);
193
194	ifq->ifq_idx = idx;
195}
196
197void
198ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
199{
200	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
201	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
202	struct mbuf *m;
203	const struct ifq_ops *oldops;
204	void *newq, *oldq;
205
206	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
207
208	mtx_enter(&ifq->ifq_mtx);
209	ifq->ifq_ops->ifqop_purge(ifq, &ml);
210	ifq->ifq_len = 0;
211
212	oldops = ifq->ifq_ops;
213	oldq = ifq->ifq_q;
214
215	ifq->ifq_ops = newops;
216	ifq->ifq_q = newq;
217
218	while ((m = ml_dequeue(&ml)) != NULL) {
219		m = ifq->ifq_ops->ifqop_enq(ifq, m);
220		if (m != NULL) {
221			ifq->ifq_qdrops++;
222			ml_enqueue(&free_ml, m);
223		} else
224			ifq->ifq_len++;
225	}
226	mtx_leave(&ifq->ifq_mtx);
227
228	oldops->ifqop_free(ifq->ifq_idx, oldq);
229
230	ml_purge(&free_ml);
231}
232
233void
234ifq_destroy(struct ifqueue *ifq)
235{
236	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
237
238	/* don't need to lock because this is the last use of the ifq */
239
240	ifq->ifq_ops->ifqop_purge(ifq, &ml);
241	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
242
243	ml_purge(&ml);
244}
245
246void
247ifq_add_data(struct ifqueue *ifq, struct if_data *data)
248{
249	mtx_enter(&ifq->ifq_mtx);
250	data->ifi_opackets += ifq->ifq_packets;
251	data->ifi_obytes += ifq->ifq_bytes;
252	data->ifi_oqdrops += ifq->ifq_qdrops;
253	data->ifi_omcasts += ifq->ifq_mcasts;
254	/* ifp->if_data.ifi_oerrors */
255	mtx_leave(&ifq->ifq_mtx);
256}
257
258int
259ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
260{
261	struct mbuf *dm;
262
263	mtx_enter(&ifq->ifq_mtx);
264	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
265	if (dm != m) {
266		ifq->ifq_packets++;
267		ifq->ifq_bytes += m->m_pkthdr.len;
268		if (ISSET(m->m_flags, M_MCAST))
269			ifq->ifq_mcasts++;
270	}
271
272	if (dm == NULL)
273		ifq->ifq_len++;
274	else
275		ifq->ifq_qdrops++;
276	mtx_leave(&ifq->ifq_mtx);
277
278	if (dm != NULL)
279		m_freem(dm);
280
281	return (dm == m ? ENOBUFS : 0);
282}
283
284static inline void
285ifq_deq_enter(struct ifqueue *ifq)
286{
287	mtx_enter(&ifq->ifq_mtx);
288}
289
290static inline void
291ifq_deq_leave(struct ifqueue *ifq)
292{
293	struct mbuf_list ml;
294
295	ml = ifq->ifq_free;
296	ml_init(&ifq->ifq_free);
297
298	mtx_leave(&ifq->ifq_mtx);
299
300	if (!ml_empty(&ml))
301		ml_purge(&ml);
302}
303
304struct mbuf *
305ifq_deq_begin(struct ifqueue *ifq)
306{
307	struct mbuf *m = NULL;
308	void *cookie;
309
310	ifq_deq_enter(ifq);
311	if (ifq->ifq_len == 0 ||
312	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
313		ifq_deq_leave(ifq);
314		return (NULL);
315	}
316
317	m->m_pkthdr.ph_cookie = cookie;
318
319	return (m);
320}
321
322void
323ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
324{
325	void *cookie;
326
327	KASSERT(m != NULL);
328	cookie = m->m_pkthdr.ph_cookie;
329
330	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
331	ifq->ifq_len--;
332	ifq_deq_leave(ifq);
333}
334
335void
336ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
337{
338	KASSERT(m != NULL);
339
340	ifq_deq_leave(ifq);
341}
342
343struct mbuf *
344ifq_dequeue(struct ifqueue *ifq)
345{
346	struct mbuf *m;
347
348	m = ifq_deq_begin(ifq);
349	if (m == NULL)
350		return (NULL);
351
352	ifq_deq_commit(ifq, m);
353
354	return (m);
355}
356
357int
358ifq_hdatalen(struct ifqueue *ifq)
359{
360	struct mbuf *m;
361	int len = 0;
362
363	m = ifq_deq_begin(ifq);
364	if (m != NULL) {
365		len = m->m_pkthdr.len;
366		ifq_deq_commit(ifq, m);
367	}
368
369	return (len);
370}
371
372unsigned int
373ifq_purge(struct ifqueue *ifq)
374{
375	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
376	unsigned int rv;
377
378	mtx_enter(&ifq->ifq_mtx);
379	ifq->ifq_ops->ifqop_purge(ifq, &ml);
380	rv = ifq->ifq_len;
381	ifq->ifq_len = 0;
382	ifq->ifq_qdrops += rv;
383	mtx_leave(&ifq->ifq_mtx);
384
385	KASSERT(rv == ml_len(&ml));
386
387	ml_purge(&ml);
388
389	return (rv);
390}
391
392void *
393ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
394{
395	mtx_enter(&ifq->ifq_mtx);
396	if (ifq->ifq_ops == ops)
397		return (ifq->ifq_q);
398
399	mtx_leave(&ifq->ifq_mtx);
400
401	return (NULL);
402}
403
404void
405ifq_q_leave(struct ifqueue *ifq, void *q)
406{
407	KASSERT(q == ifq->ifq_q);
408	mtx_leave(&ifq->ifq_mtx);
409}
410
411void
412ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
413{
414	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
415
416	ifq->ifq_len--;
417	ifq->ifq_qdrops++;
418	ml_enqueue(&ifq->ifq_free, m);
419}
420
421void
422ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
423{
424	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
425
426	ifq->ifq_len -= ml_len(ml);
427	ifq->ifq_qdrops += ml_len(ml);
428	ml_enlist(&ifq->ifq_free, ml);
429}
430
431/*
432 * ifiq
433 */
434
435static void	ifiq_process(void *);
436
437void
438ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
439{
440	ifiq->ifiq_if = ifp;
441	ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
442	ifiq->ifiq_softc = NULL;
443
444	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
445	ml_init(&ifiq->ifiq_ml);
446	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
447	ifiq->ifiq_pressure = 0;
448
449	ifiq->ifiq_packets = 0;
450	ifiq->ifiq_bytes = 0;
451	ifiq->ifiq_qdrops = 0;
452	ifiq->ifiq_errors = 0;
453
454	ifiq->ifiq_idx = idx;
455}
456
457void
458ifiq_destroy(struct ifiqueue *ifiq)
459{
460	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) {
461		NET_ASSERT_UNLOCKED();
462		taskq_barrier(ifiq->ifiq_softnet);
463	}
464
465	/* don't need to lock because this is the last use of the ifiq */
466	ml_purge(&ifiq->ifiq_ml);
467}
468
469unsigned int ifiq_maxlen_drop = 2048 * 5;
470unsigned int ifiq_maxlen_return = 2048 * 3;
471
472int
473ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
474{
475	struct ifnet *ifp = ifiq->ifiq_if;
476	struct mbuf *m;
477	uint64_t packets;
478	uint64_t bytes = 0;
479	unsigned int len;
480#if NBPFILTER > 0
481	caddr_t if_bpf;
482#endif
483
484	if (ml_empty(ml))
485		return (0);
486
487	MBUF_LIST_FOREACH(ml, m) {
488		m->m_pkthdr.ph_ifidx = ifp->if_index;
489		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
490		bytes += m->m_pkthdr.len;
491	}
492	packets = ml_len(ml);
493
494#if NBPFILTER > 0
495	if_bpf = ifp->if_bpf;
496	if (if_bpf) {
497		struct mbuf_list ml0 = *ml;
498
499		ml_init(ml);
500
501		while ((m = ml_dequeue(&ml0)) != NULL) {
502			if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN))
503				m_freem(m);
504			else
505				ml_enqueue(ml, m);
506		}
507
508		if (ml_empty(ml)) {
509			mtx_enter(&ifiq->ifiq_mtx);
510			ifiq->ifiq_packets += packets;
511			ifiq->ifiq_bytes += bytes;
512			mtx_leave(&ifiq->ifiq_mtx);
513
514			return (0);
515		}
516	}
517#endif
518
519	mtx_enter(&ifiq->ifiq_mtx);
520	ifiq->ifiq_packets += packets;
521	ifiq->ifiq_bytes += bytes;
522
523	len = ml_len(&ifiq->ifiq_ml);
524	if (len > ifiq_maxlen_drop)
525		ifiq->ifiq_qdrops += ml_len(ml);
526	else
527		ml_enlist(&ifiq->ifiq_ml, ml);
528	mtx_leave(&ifiq->ifiq_mtx);
529
530	if (ml_empty(ml))
531		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
532	else
533		ml_purge(ml);
534
535	return (len > ifiq_maxlen_return);
536}
537
538void
539ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
540{
541	mtx_enter(&ifiq->ifiq_mtx);
542	data->ifi_ipackets += ifiq->ifiq_packets;
543	data->ifi_ibytes += ifiq->ifiq_bytes;
544	data->ifi_iqdrops += ifiq->ifiq_qdrops;
545	mtx_leave(&ifiq->ifiq_mtx);
546}
547
548int
549ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
550{
551	mtx_enter(&ifiq->ifiq_mtx);
552	ml_enqueue(&ifiq->ifiq_ml, m);
553	mtx_leave(&ifiq->ifiq_mtx);
554
555	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
556
557	return (0);
558}
559
560static void
561ifiq_process(void *arg)
562{
563	struct ifiqueue *ifiq = arg;
564	struct mbuf_list ml;
565
566	if (ifiq_empty(ifiq))
567		return;
568
569	mtx_enter(&ifiq->ifiq_mtx);
570	ml = ifiq->ifiq_ml;
571	ml_init(&ifiq->ifiq_ml);
572	mtx_leave(&ifiq->ifiq_mtx);
573
574	if_input_process(ifiq->ifiq_if, &ml);
575}
576
577/*
578 * priq implementation
579 */
580
581unsigned int
582priq_idx(unsigned int nqueues, const struct mbuf *m)
583{
584	unsigned int flow = 0;
585
586	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
587		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
588
589	return (flow % nqueues);
590}
591
592void *
593priq_alloc(unsigned int idx, void *null)
594{
595	struct priq *pq;
596	int i;
597
598	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
599	for (i = 0; i < IFQ_NQUEUES; i++)
600		ml_init(&pq->pq_lists[i]);
601	return (pq);
602}
603
604void
605priq_free(unsigned int idx, void *pq)
606{
607	free(pq, M_DEVBUF, sizeof(struct priq));
608}
609
610struct mbuf *
611priq_enq(struct ifqueue *ifq, struct mbuf *m)
612{
613	struct priq *pq;
614	struct mbuf_list *pl;
615	struct mbuf *n = NULL;
616	unsigned int prio;
617
618	pq = ifq->ifq_q;
619	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
620
621	/* Find a lower priority queue to drop from */
622	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
623		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
624			pl = &pq->pq_lists[prio];
625			if (ml_len(pl) > 0) {
626				n = ml_dequeue(pl);
627				goto enqueue;
628			}
629		}
630		/*
631		 * There's no lower priority queue that we can
632		 * drop from so don't enqueue this one.
633		 */
634		return (m);
635	}
636
637 enqueue:
638	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
639	ml_enqueue(pl, m);
640
641	return (n);
642}
643
644struct mbuf *
645priq_deq_begin(struct ifqueue *ifq, void **cookiep)
646{
647	struct priq *pq = ifq->ifq_q;
648	struct mbuf_list *pl;
649	unsigned int prio = nitems(pq->pq_lists);
650	struct mbuf *m;
651
652	do {
653		pl = &pq->pq_lists[--prio];
654		m = MBUF_LIST_FIRST(pl);
655		if (m != NULL) {
656			*cookiep = pl;
657			return (m);
658		}
659	} while (prio > 0);
660
661	return (NULL);
662}
663
664void
665priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
666{
667	struct mbuf_list *pl = cookie;
668
669	KASSERT(MBUF_LIST_FIRST(pl) == m);
670
671	ml_dequeue(pl);
672}
673
674void
675priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
676{
677	struct priq *pq = ifq->ifq_q;
678	struct mbuf_list *pl;
679	unsigned int prio = nitems(pq->pq_lists);
680
681	do {
682		pl = &pq->pq_lists[--prio];
683		ml_enlist(ml, pl);
684	} while (prio > 0);
685}
686