ifq.c revision 1.30
1/*	$OpenBSD: ifq.c,v 1.30 2019/03/29 04:21:55 dlg Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bpfilter.h"
20
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/socket.h>
24#include <sys/mbuf.h>
25#include <sys/proc.h>
26
27#include <net/if.h>
28#include <net/if_var.h>
29
30#if NBPFILTER > 0
31#include <net/bpf.h>
32#endif
33
34/*
35 * priq glue
36 */
37unsigned int	 priq_idx(unsigned int, const struct mbuf *);
38struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
39struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
40void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
41void		 priq_purge(struct ifqueue *, struct mbuf_list *);
42
43void		*priq_alloc(unsigned int, void *);
44void		 priq_free(unsigned int, void *);
45
46const struct ifq_ops priq_ops = {
47	priq_idx,
48	priq_enq,
49	priq_deq_begin,
50	priq_deq_commit,
51	priq_purge,
52	priq_alloc,
53	priq_free,
54};
55
56const struct ifq_ops * const ifq_priq_ops = &priq_ops;
57
58/*
59 * priq internal structures
60 */
61
62struct priq {
63	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
64};
65
66/*
67 * ifqueue serialiser
68 */
69
70void	ifq_start_task(void *);
71void	ifq_restart_task(void *);
72void	ifq_barrier_task(void *);
73
74void
75ifq_serialize(struct ifqueue *ifq, struct task *t)
76{
77	struct task work;
78
79	if (ISSET(t->t_flags, TASK_ONQUEUE))
80		return;
81
82	mtx_enter(&ifq->ifq_task_mtx);
83	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
84		SET(t->t_flags, TASK_ONQUEUE);
85		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
86	}
87
88	if (ifq->ifq_serializer == NULL) {
89		ifq->ifq_serializer = curcpu();
90
91		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
92			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
93			CLR(t->t_flags, TASK_ONQUEUE);
94			work = *t; /* copy to caller to avoid races */
95
96			mtx_leave(&ifq->ifq_task_mtx);
97
98			(*work.t_func)(work.t_arg);
99
100			mtx_enter(&ifq->ifq_task_mtx);
101		}
102
103		ifq->ifq_serializer = NULL;
104	}
105	mtx_leave(&ifq->ifq_task_mtx);
106}
107
108int
109ifq_is_serialized(struct ifqueue *ifq)
110{
111	return (ifq->ifq_serializer == curcpu());
112}
113
114void
115ifq_start_task(void *p)
116{
117	struct ifqueue *ifq = p;
118	struct ifnet *ifp = ifq->ifq_if;
119
120	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
121	    ifq_empty(ifq) || ifq_is_oactive(ifq))
122		return;
123
124	ifp->if_qstart(ifq);
125}
126
127void
128ifq_restart_task(void *p)
129{
130	struct ifqueue *ifq = p;
131	struct ifnet *ifp = ifq->ifq_if;
132
133	ifq_clr_oactive(ifq);
134	ifp->if_qstart(ifq);
135}
136
137void
138ifq_barrier(struct ifqueue *ifq)
139{
140	struct cond c = COND_INITIALIZER();
141	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
142
143	if (ifq->ifq_serializer == NULL)
144		return;
145
146	ifq_serialize(ifq, &t);
147
148	cond_wait(&c, "ifqbar");
149}
150
151void
152ifq_barrier_task(void *p)
153{
154	struct cond *c = p;
155
156	cond_signal(c);
157}
158
159/*
160 * ifqueue mbuf queue API
161 */
162
163void
164ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
165{
166	ifq->ifq_if = ifp;
167	ifq->ifq_softc = NULL;
168
169	mtx_init(&ifq->ifq_mtx, IPL_NET);
170
171	/* default to priq */
172	ifq->ifq_ops = &priq_ops;
173	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
174
175	ml_init(&ifq->ifq_free);
176	ifq->ifq_len = 0;
177
178	ifq->ifq_packets = 0;
179	ifq->ifq_bytes = 0;
180	ifq->ifq_qdrops = 0;
181	ifq->ifq_errors = 0;
182	ifq->ifq_mcasts = 0;
183
184	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
185	TAILQ_INIT(&ifq->ifq_task_list);
186	ifq->ifq_serializer = NULL;
187
188	task_set(&ifq->ifq_start, ifq_start_task, ifq);
189	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
190
191	if (ifq->ifq_maxlen == 0)
192		ifq_set_maxlen(ifq, IFQ_MAXLEN);
193
194	ifq->ifq_idx = idx;
195}
196
197void
198ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
199{
200	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
201	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
202	struct mbuf *m;
203	const struct ifq_ops *oldops;
204	void *newq, *oldq;
205
206	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
207
208	mtx_enter(&ifq->ifq_mtx);
209	ifq->ifq_ops->ifqop_purge(ifq, &ml);
210	ifq->ifq_len = 0;
211
212	oldops = ifq->ifq_ops;
213	oldq = ifq->ifq_q;
214
215	ifq->ifq_ops = newops;
216	ifq->ifq_q = newq;
217
218	while ((m = ml_dequeue(&ml)) != NULL) {
219		m = ifq->ifq_ops->ifqop_enq(ifq, m);
220		if (m != NULL) {
221			ifq->ifq_qdrops++;
222			ml_enqueue(&free_ml, m);
223		} else
224			ifq->ifq_len++;
225	}
226	mtx_leave(&ifq->ifq_mtx);
227
228	oldops->ifqop_free(ifq->ifq_idx, oldq);
229
230	ml_purge(&free_ml);
231}
232
233void
234ifq_destroy(struct ifqueue *ifq)
235{
236	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
237
238	/* don't need to lock because this is the last use of the ifq */
239
240	ifq->ifq_ops->ifqop_purge(ifq, &ml);
241	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
242
243	ml_purge(&ml);
244}
245
246void
247ifq_add_data(struct ifqueue *ifq, struct if_data *data)
248{
249	mtx_enter(&ifq->ifq_mtx);
250	data->ifi_opackets += ifq->ifq_packets;
251	data->ifi_obytes += ifq->ifq_bytes;
252	data->ifi_oqdrops += ifq->ifq_qdrops;
253	data->ifi_omcasts += ifq->ifq_mcasts;
254	/* ifp->if_data.ifi_oerrors */
255	mtx_leave(&ifq->ifq_mtx);
256}
257
258int
259ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
260{
261	struct mbuf *dm;
262
263	mtx_enter(&ifq->ifq_mtx);
264	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
265	if (dm != m) {
266		ifq->ifq_packets++;
267		ifq->ifq_bytes += m->m_pkthdr.len;
268		if (ISSET(m->m_flags, M_MCAST))
269			ifq->ifq_mcasts++;
270	}
271
272	if (dm == NULL)
273		ifq->ifq_len++;
274	else
275		ifq->ifq_qdrops++;
276	mtx_leave(&ifq->ifq_mtx);
277
278	if (dm != NULL)
279		m_freem(dm);
280
281	return (dm == m ? ENOBUFS : 0);
282}
283
284static inline void
285ifq_deq_enter(struct ifqueue *ifq)
286{
287	mtx_enter(&ifq->ifq_mtx);
288}
289
290static inline void
291ifq_deq_leave(struct ifqueue *ifq)
292{
293	struct mbuf_list ml;
294
295	ml = ifq->ifq_free;
296	ml_init(&ifq->ifq_free);
297
298	mtx_leave(&ifq->ifq_mtx);
299
300	if (!ml_empty(&ml))
301		ml_purge(&ml);
302}
303
304struct mbuf *
305ifq_deq_begin(struct ifqueue *ifq)
306{
307	struct mbuf *m = NULL;
308	void *cookie;
309
310	ifq_deq_enter(ifq);
311	if (ifq->ifq_len == 0 ||
312	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
313		ifq_deq_leave(ifq);
314		return (NULL);
315	}
316
317	m->m_pkthdr.ph_cookie = cookie;
318
319	return (m);
320}
321
322void
323ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
324{
325	void *cookie;
326
327	KASSERT(m != NULL);
328	cookie = m->m_pkthdr.ph_cookie;
329
330	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
331	ifq->ifq_len--;
332	ifq_deq_leave(ifq);
333}
334
335void
336ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
337{
338	KASSERT(m != NULL);
339
340	ifq_deq_leave(ifq);
341}
342
343struct mbuf *
344ifq_dequeue(struct ifqueue *ifq)
345{
346	struct mbuf *m;
347
348	m = ifq_deq_begin(ifq);
349	if (m == NULL)
350		return (NULL);
351
352	ifq_deq_commit(ifq, m);
353
354	return (m);
355}
356
357int
358ifq_hdatalen(struct ifqueue *ifq)
359{
360	struct mbuf *m;
361	int len = 0;
362
363	m = ifq_deq_begin(ifq);
364	if (m != NULL) {
365		len = m->m_pkthdr.len;
366		ifq_deq_commit(ifq, m);
367	}
368
369	return (len);
370}
371
372unsigned int
373ifq_purge(struct ifqueue *ifq)
374{
375	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
376	unsigned int rv;
377
378	mtx_enter(&ifq->ifq_mtx);
379	ifq->ifq_ops->ifqop_purge(ifq, &ml);
380	rv = ifq->ifq_len;
381	ifq->ifq_len = 0;
382	ifq->ifq_qdrops += rv;
383	mtx_leave(&ifq->ifq_mtx);
384
385	KASSERT(rv == ml_len(&ml));
386
387	ml_purge(&ml);
388
389	return (rv);
390}
391
392void *
393ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
394{
395	mtx_enter(&ifq->ifq_mtx);
396	if (ifq->ifq_ops == ops)
397		return (ifq->ifq_q);
398
399	mtx_leave(&ifq->ifq_mtx);
400
401	return (NULL);
402}
403
404void
405ifq_q_leave(struct ifqueue *ifq, void *q)
406{
407	KASSERT(q == ifq->ifq_q);
408	mtx_leave(&ifq->ifq_mtx);
409}
410
411void
412ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
413{
414	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
415
416	ifq->ifq_len--;
417	ifq->ifq_qdrops++;
418	ml_enqueue(&ifq->ifq_free, m);
419}
420
421void
422ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
423{
424	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
425
426	ifq->ifq_len -= ml_len(ml);
427	ifq->ifq_qdrops += ml_len(ml);
428	ml_enlist(&ifq->ifq_free, ml);
429}
430
431/*
432 * ifiq
433 */
434
435static void	ifiq_process(void *);
436
437void
438ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
439{
440	ifiq->ifiq_if = ifp;
441	ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
442	ifiq->ifiq_softc = NULL;
443
444	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
445	ml_init(&ifiq->ifiq_ml);
446	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
447	ifiq->ifiq_pressure = 0;
448
449	ifiq->ifiq_packets = 0;
450	ifiq->ifiq_bytes = 0;
451	ifiq->ifiq_qdrops = 0;
452	ifiq->ifiq_errors = 0;
453
454	ifiq->ifiq_idx = idx;
455}
456
457void
458ifiq_destroy(struct ifiqueue *ifiq)
459{
460	NET_ASSERT_UNLOCKED();
461	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
462		taskq_barrier(ifiq->ifiq_softnet);
463
464	/* don't need to lock because this is the last use of the ifiq */
465	ml_purge(&ifiq->ifiq_ml);
466}
467
468unsigned int ifiq_maxlen_drop = 2048 * 5;
469unsigned int ifiq_maxlen_return = 2048 * 3;
470
471int
472ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
473{
474	struct ifnet *ifp = ifiq->ifiq_if;
475	struct mbuf *m;
476	uint64_t packets;
477	uint64_t bytes = 0;
478	unsigned int len;
479#if NBPFILTER > 0
480	caddr_t if_bpf;
481#endif
482
483	if (ml_empty(ml))
484		return (0);
485
486	MBUF_LIST_FOREACH(ml, m) {
487		m->m_pkthdr.ph_ifidx = ifp->if_index;
488		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
489		bytes += m->m_pkthdr.len;
490	}
491	packets = ml_len(ml);
492
493#if NBPFILTER > 0
494	if_bpf = ifp->if_bpf;
495	if (if_bpf) {
496		struct mbuf_list ml0 = *ml;
497
498		ml_init(ml);
499
500		while ((m = ml_dequeue(&ml0)) != NULL) {
501			if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN))
502				m_freem(m);
503			else
504				ml_enqueue(ml, m);
505		}
506
507		if (ml_empty(ml)) {
508			mtx_enter(&ifiq->ifiq_mtx);
509			ifiq->ifiq_packets += packets;
510			ifiq->ifiq_bytes += bytes;
511			mtx_leave(&ifiq->ifiq_mtx);
512
513			return (0);
514		}
515	}
516#endif
517
518	mtx_enter(&ifiq->ifiq_mtx);
519	ifiq->ifiq_packets += packets;
520	ifiq->ifiq_bytes += bytes;
521
522	len = ml_len(&ifiq->ifiq_ml);
523	if (len > ifiq_maxlen_drop)
524		ifiq->ifiq_qdrops += ml_len(ml);
525	else
526		ml_enlist(&ifiq->ifiq_ml, ml);
527	mtx_leave(&ifiq->ifiq_mtx);
528
529	if (ml_empty(ml))
530		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
531	else
532		ml_purge(ml);
533
534	return (len > ifiq_maxlen_return);
535}
536
537void
538ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
539{
540	mtx_enter(&ifiq->ifiq_mtx);
541	data->ifi_ipackets += ifiq->ifiq_packets;
542	data->ifi_ibytes += ifiq->ifiq_bytes;
543	data->ifi_iqdrops += ifiq->ifiq_qdrops;
544	mtx_leave(&ifiq->ifiq_mtx);
545}
546
547int
548ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
549{
550	mtx_enter(&ifiq->ifiq_mtx);
551	ml_enqueue(&ifiq->ifiq_ml, m);
552	mtx_leave(&ifiq->ifiq_mtx);
553
554	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
555
556	return (0);
557}
558
559static void
560ifiq_process(void *arg)
561{
562	struct ifiqueue *ifiq = arg;
563	struct mbuf_list ml;
564
565	if (ifiq_empty(ifiq))
566		return;
567
568	mtx_enter(&ifiq->ifiq_mtx);
569	ml = ifiq->ifiq_ml;
570	ml_init(&ifiq->ifiq_ml);
571	mtx_leave(&ifiq->ifiq_mtx);
572
573	if_input_process(ifiq->ifiq_if, &ml);
574}
575
576/*
577 * priq implementation
578 */
579
580unsigned int
581priq_idx(unsigned int nqueues, const struct mbuf *m)
582{
583	unsigned int flow = 0;
584
585	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
586		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
587
588	return (flow % nqueues);
589}
590
591void *
592priq_alloc(unsigned int idx, void *null)
593{
594	struct priq *pq;
595	int i;
596
597	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
598	for (i = 0; i < IFQ_NQUEUES; i++)
599		ml_init(&pq->pq_lists[i]);
600	return (pq);
601}
602
603void
604priq_free(unsigned int idx, void *pq)
605{
606	free(pq, M_DEVBUF, sizeof(struct priq));
607}
608
609struct mbuf *
610priq_enq(struct ifqueue *ifq, struct mbuf *m)
611{
612	struct priq *pq;
613	struct mbuf_list *pl;
614	struct mbuf *n = NULL;
615	unsigned int prio;
616
617	pq = ifq->ifq_q;
618	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
619
620	/* Find a lower priority queue to drop from */
621	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
622		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
623			pl = &pq->pq_lists[prio];
624			if (ml_len(pl) > 0) {
625				n = ml_dequeue(pl);
626				goto enqueue;
627			}
628		}
629		/*
630		 * There's no lower priority queue that we can
631		 * drop from so don't enqueue this one.
632		 */
633		return (m);
634	}
635
636 enqueue:
637	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
638	ml_enqueue(pl, m);
639
640	return (n);
641}
642
643struct mbuf *
644priq_deq_begin(struct ifqueue *ifq, void **cookiep)
645{
646	struct priq *pq = ifq->ifq_q;
647	struct mbuf_list *pl;
648	unsigned int prio = nitems(pq->pq_lists);
649	struct mbuf *m;
650
651	do {
652		pl = &pq->pq_lists[--prio];
653		m = MBUF_LIST_FIRST(pl);
654		if (m != NULL) {
655			*cookiep = pl;
656			return (m);
657		}
658	} while (prio > 0);
659
660	return (NULL);
661}
662
663void
664priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
665{
666	struct mbuf_list *pl = cookie;
667
668	KASSERT(MBUF_LIST_FIRST(pl) == m);
669
670	ml_dequeue(pl);
671}
672
673void
674priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
675{
676	struct priq *pq = ifq->ifq_q;
677	struct mbuf_list *pl;
678	unsigned int prio = nitems(pq->pq_lists);
679
680	do {
681		pl = &pq->pq_lists[--prio];
682		ml_enlist(ml, pl);
683	} while (prio > 0);
684}
685