ifq.c revision 1.22
1/*	$OpenBSD: ifq.c,v 1.22 2018/01/25 14:04:36 mpi Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bpfilter.h"
20
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/socket.h>
24#include <sys/mbuf.h>
25#include <sys/proc.h>
26
27#include <net/if.h>
28#include <net/if_var.h>
29
30#if NBPFILTER > 0
31#include <net/bpf.h>
32#endif
33
34/*
35 * priq glue
36 */
37unsigned int	 priq_idx(unsigned int, const struct mbuf *);
38struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
39struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
40void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
41void		 priq_purge(struct ifqueue *, struct mbuf_list *);
42
43void		*priq_alloc(unsigned int, void *);
44void		 priq_free(unsigned int, void *);
45
46const struct ifq_ops priq_ops = {
47	priq_idx,
48	priq_enq,
49	priq_deq_begin,
50	priq_deq_commit,
51	priq_purge,
52	priq_alloc,
53	priq_free,
54};
55
56const struct ifq_ops * const ifq_priq_ops = &priq_ops;
57
58/*
59 * priq internal structures
60 */
61
62struct priq {
63	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
64};
65
66/*
67 * ifqueue serialiser
68 */
69
70void	ifq_start_task(void *);
71void	ifq_restart_task(void *);
72void	ifq_barrier_task(void *);
73
74#define TASK_ONQUEUE 0x1
75
76void
77ifq_serialize(struct ifqueue *ifq, struct task *t)
78{
79	struct task work;
80
81	if (ISSET(t->t_flags, TASK_ONQUEUE))
82		return;
83
84	mtx_enter(&ifq->ifq_task_mtx);
85	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
86		SET(t->t_flags, TASK_ONQUEUE);
87		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
88	}
89
90	if (ifq->ifq_serializer == NULL) {
91		ifq->ifq_serializer = curcpu();
92
93		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
94			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
95			CLR(t->t_flags, TASK_ONQUEUE);
96			work = *t; /* copy to caller to avoid races */
97
98			mtx_leave(&ifq->ifq_task_mtx);
99
100			(*work.t_func)(work.t_arg);
101
102			mtx_enter(&ifq->ifq_task_mtx);
103		}
104
105		ifq->ifq_serializer = NULL;
106	}
107	mtx_leave(&ifq->ifq_task_mtx);
108}
109
110int
111ifq_is_serialized(struct ifqueue *ifq)
112{
113	return (ifq->ifq_serializer == curcpu());
114}
115
116void
117ifq_start_task(void *p)
118{
119	struct ifqueue *ifq = p;
120	struct ifnet *ifp = ifq->ifq_if;
121
122	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
123	    ifq_empty(ifq) || ifq_is_oactive(ifq))
124		return;
125
126	ifp->if_qstart(ifq);
127}
128
129void
130ifq_restart_task(void *p)
131{
132	struct ifqueue *ifq = p;
133	struct ifnet *ifp = ifq->ifq_if;
134
135	ifq_clr_oactive(ifq);
136	ifp->if_qstart(ifq);
137}
138
139void
140ifq_barrier(struct ifqueue *ifq)
141{
142	struct cond c = COND_INITIALIZER();
143	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
144
145	if (ifq->ifq_serializer == NULL)
146		return;
147
148	ifq_serialize(ifq, &t);
149
150	cond_wait(&c, "ifqbar");
151}
152
153void
154ifq_barrier_task(void *p)
155{
156	struct cond *c = p;
157
158	cond_signal(c);
159}
160
161/*
162 * ifqueue mbuf queue API
163 */
164
165void
166ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
167{
168	ifq->ifq_if = ifp;
169	ifq->ifq_softc = NULL;
170
171	mtx_init(&ifq->ifq_mtx, IPL_NET);
172	ifq->ifq_qdrops = 0;
173
174	/* default to priq */
175	ifq->ifq_ops = &priq_ops;
176	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
177
178	ml_init(&ifq->ifq_free);
179	ifq->ifq_len = 0;
180
181	ifq->ifq_packets = 0;
182	ifq->ifq_bytes = 0;
183	ifq->ifq_qdrops = 0;
184	ifq->ifq_errors = 0;
185	ifq->ifq_mcasts = 0;
186
187	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
188	TAILQ_INIT(&ifq->ifq_task_list);
189	ifq->ifq_serializer = NULL;
190
191	task_set(&ifq->ifq_start, ifq_start_task, ifq);
192	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
193
194	if (ifq->ifq_maxlen == 0)
195		ifq_set_maxlen(ifq, IFQ_MAXLEN);
196
197	ifq->ifq_idx = idx;
198}
199
200void
201ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
202{
203	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
204	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
205	struct mbuf *m;
206	const struct ifq_ops *oldops;
207	void *newq, *oldq;
208
209	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
210
211	mtx_enter(&ifq->ifq_mtx);
212	ifq->ifq_ops->ifqop_purge(ifq, &ml);
213	ifq->ifq_len = 0;
214
215	oldops = ifq->ifq_ops;
216	oldq = ifq->ifq_q;
217
218	ifq->ifq_ops = newops;
219	ifq->ifq_q = newq;
220
221	while ((m = ml_dequeue(&ml)) != NULL) {
222		m = ifq->ifq_ops->ifqop_enq(ifq, m);
223		if (m != NULL) {
224			ifq->ifq_qdrops++;
225			ml_enqueue(&free_ml, m);
226		} else
227			ifq->ifq_len++;
228	}
229	mtx_leave(&ifq->ifq_mtx);
230
231	oldops->ifqop_free(ifq->ifq_idx, oldq);
232
233	ml_purge(&free_ml);
234}
235
236void
237ifq_destroy(struct ifqueue *ifq)
238{
239	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
240
241	/* don't need to lock because this is the last use of the ifq */
242
243	ifq->ifq_ops->ifqop_purge(ifq, &ml);
244	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
245
246	ml_purge(&ml);
247}
248
249void
250ifq_add_data(struct ifqueue *ifq, struct if_data *data)
251{
252	mtx_enter(&ifq->ifq_mtx);
253	data->ifi_opackets += ifq->ifq_packets;
254	data->ifi_obytes += ifq->ifq_bytes;
255	data->ifi_oqdrops += ifq->ifq_qdrops;
256	data->ifi_omcasts += ifq->ifq_mcasts;
257	/* ifp->if_data.ifi_oerrors */
258	mtx_leave(&ifq->ifq_mtx);
259}
260
261int
262ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
263{
264	struct mbuf *dm;
265
266	mtx_enter(&ifq->ifq_mtx);
267	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
268	if (dm != m) {
269		ifq->ifq_packets++;
270		ifq->ifq_bytes += m->m_pkthdr.len;
271		if (ISSET(m->m_flags, M_MCAST))
272			ifq->ifq_mcasts++;
273	}
274
275	if (dm == NULL)
276		ifq->ifq_len++;
277	else
278		ifq->ifq_qdrops++;
279	mtx_leave(&ifq->ifq_mtx);
280
281	if (dm != NULL)
282		m_freem(dm);
283
284	return (dm == m ? ENOBUFS : 0);
285}
286
287static inline void
288ifq_deq_enter(struct ifqueue *ifq)
289{
290	mtx_enter(&ifq->ifq_mtx);
291}
292
293static inline void
294ifq_deq_leave(struct ifqueue *ifq)
295{
296	struct mbuf_list ml;
297
298	ml = ifq->ifq_free;
299	ml_init(&ifq->ifq_free);
300
301	mtx_leave(&ifq->ifq_mtx);
302
303	if (!ml_empty(&ml))
304		ml_purge(&ml);
305}
306
307struct mbuf *
308ifq_deq_begin(struct ifqueue *ifq)
309{
310	struct mbuf *m = NULL;
311	void *cookie;
312
313	ifq_deq_enter(ifq);
314	if (ifq->ifq_len == 0 ||
315	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
316		ifq_deq_leave(ifq);
317		return (NULL);
318	}
319
320	m->m_pkthdr.ph_cookie = cookie;
321
322	return (m);
323}
324
325void
326ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
327{
328	void *cookie;
329
330	KASSERT(m != NULL);
331	cookie = m->m_pkthdr.ph_cookie;
332
333	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
334	ifq->ifq_len--;
335	ifq_deq_leave(ifq);
336}
337
338void
339ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
340{
341	KASSERT(m != NULL);
342
343	ifq_deq_leave(ifq);
344}
345
346struct mbuf *
347ifq_dequeue(struct ifqueue *ifq)
348{
349	struct mbuf *m;
350
351	m = ifq_deq_begin(ifq);
352	if (m == NULL)
353		return (NULL);
354
355	ifq_deq_commit(ifq, m);
356
357	return (m);
358}
359
360unsigned int
361ifq_purge(struct ifqueue *ifq)
362{
363	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
364	unsigned int rv;
365
366	mtx_enter(&ifq->ifq_mtx);
367	ifq->ifq_ops->ifqop_purge(ifq, &ml);
368	rv = ifq->ifq_len;
369	ifq->ifq_len = 0;
370	ifq->ifq_qdrops += rv;
371	mtx_leave(&ifq->ifq_mtx);
372
373	KASSERT(rv == ml_len(&ml));
374
375	ml_purge(&ml);
376
377	return (rv);
378}
379
380void *
381ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
382{
383	mtx_enter(&ifq->ifq_mtx);
384	if (ifq->ifq_ops == ops)
385		return (ifq->ifq_q);
386
387	mtx_leave(&ifq->ifq_mtx);
388
389	return (NULL);
390}
391
392void
393ifq_q_leave(struct ifqueue *ifq, void *q)
394{
395	KASSERT(q == ifq->ifq_q);
396	mtx_leave(&ifq->ifq_mtx);
397}
398
399void
400ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
401{
402	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
403
404	ifq->ifq_len--;
405	ifq->ifq_qdrops++;
406	ml_enqueue(&ifq->ifq_free, m);
407}
408
409void
410ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
411{
412	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
413
414	ifq->ifq_len -= ml_len(ml);
415	ifq->ifq_qdrops += ml_len(ml);
416	ml_enlist(&ifq->ifq_free, ml);
417}
418
419/*
420 * ifiq
421 */
422
423static void	ifiq_process(void *);
424
425void
426ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
427{
428	ifiq->ifiq_if = ifp;
429	ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
430	ifiq->ifiq_softc = NULL;
431
432	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
433	ml_init(&ifiq->ifiq_ml);
434	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
435
436	ifiq->ifiq_qdrops = 0;
437	ifiq->ifiq_packets = 0;
438	ifiq->ifiq_bytes = 0;
439	ifiq->ifiq_qdrops = 0;
440	ifiq->ifiq_errors = 0;
441
442	ifiq->ifiq_idx = idx;
443}
444
445void
446ifiq_destroy(struct ifiqueue *ifiq)
447{
448	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) {
449		NET_ASSERT_UNLOCKED();
450		taskq_barrier(ifiq->ifiq_softnet);
451	}
452
453	/* don't need to lock because this is the last use of the ifiq */
454	ml_purge(&ifiq->ifiq_ml);
455}
456
457int
458ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm)
459{
460	struct ifnet *ifp = ifiq->ifiq_if;
461	struct mbuf *m;
462	uint64_t packets;
463	uint64_t bytes = 0;
464#if NBPFILTER > 0
465	caddr_t if_bpf;
466#endif
467	int rv = 1;
468
469	if (ml_empty(ml))
470		return (0);
471
472	MBUF_LIST_FOREACH(ml, m) {
473		m->m_pkthdr.ph_ifidx = ifp->if_index;
474		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
475		bytes += m->m_pkthdr.len;
476	}
477	packets = ml_len(ml);
478
479#if NBPFILTER > 0
480	if_bpf = ifp->if_bpf;
481	if (if_bpf) {
482		struct mbuf_list ml0 = *ml;
483
484		ml_init(ml);
485
486		while ((m = ml_dequeue(&ml0)) != NULL) {
487			if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN))
488				m_freem(m);
489			else
490				ml_enqueue(ml, m);
491		}
492
493		if (ml_empty(ml)) {
494			mtx_enter(&ifiq->ifiq_mtx);
495			ifiq->ifiq_packets += packets;
496			ifiq->ifiq_bytes += bytes;
497			mtx_leave(&ifiq->ifiq_mtx);
498
499			return (0);
500		}
501	}
502#endif
503
504	mtx_enter(&ifiq->ifiq_mtx);
505	ifiq->ifiq_packets += packets;
506	ifiq->ifiq_bytes += bytes;
507
508	if (ifiq_len(ifiq) >= cwm * 5)
509		ifiq->ifiq_qdrops += ml_len(ml);
510	else {
511		rv = (ifiq_len(ifiq) >= cwm * 3);
512		ml_enlist(&ifiq->ifiq_ml, ml);
513	}
514	mtx_leave(&ifiq->ifiq_mtx);
515
516	if (ml_empty(ml))
517		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
518	else
519		ml_purge(ml);
520
521	return (rv);
522}
523
524void
525ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
526{
527	mtx_enter(&ifiq->ifiq_mtx);
528	data->ifi_ipackets += ifiq->ifiq_packets;
529	data->ifi_ibytes += ifiq->ifiq_bytes;
530	data->ifi_iqdrops += ifiq->ifiq_qdrops;
531	mtx_leave(&ifiq->ifiq_mtx);
532}
533
534void
535ifiq_barrier(struct ifiqueue *ifiq)
536{
537	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
538		taskq_barrier(ifiq->ifiq_softnet);
539}
540
541int
542ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
543{
544	mtx_enter(&ifiq->ifiq_mtx);
545	ml_enqueue(&ifiq->ifiq_ml, m);
546	mtx_leave(&ifiq->ifiq_mtx);
547
548	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
549
550	return (0);
551}
552
553static void
554ifiq_process(void *arg)
555{
556	struct ifiqueue *ifiq = arg;
557	struct mbuf_list ml;
558
559	if (ifiq_empty(ifiq))
560		return;
561
562	mtx_enter(&ifiq->ifiq_mtx);
563	ml = ifiq->ifiq_ml;
564	ml_init(&ifiq->ifiq_ml);
565	mtx_leave(&ifiq->ifiq_mtx);
566
567	if_input_process(ifiq->ifiq_if, &ml);
568}
569
570/*
571 * priq implementation
572 */
573
574unsigned int
575priq_idx(unsigned int nqueues, const struct mbuf *m)
576{
577	unsigned int flow = 0;
578
579	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
580		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
581
582	return (flow % nqueues);
583}
584
585void *
586priq_alloc(unsigned int idx, void *null)
587{
588	struct priq *pq;
589	int i;
590
591	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
592	for (i = 0; i < IFQ_NQUEUES; i++)
593		ml_init(&pq->pq_lists[i]);
594	return (pq);
595}
596
597void
598priq_free(unsigned int idx, void *pq)
599{
600	free(pq, M_DEVBUF, sizeof(struct priq));
601}
602
603struct mbuf *
604priq_enq(struct ifqueue *ifq, struct mbuf *m)
605{
606	struct priq *pq;
607	struct mbuf_list *pl;
608	struct mbuf *n = NULL;
609	unsigned int prio;
610
611	pq = ifq->ifq_q;
612	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
613
614	/* Find a lower priority queue to drop from */
615	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
616		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
617			pl = &pq->pq_lists[prio];
618			if (ml_len(pl) > 0) {
619				n = ml_dequeue(pl);
620				goto enqueue;
621			}
622		}
623		/*
624		 * There's no lower priority queue that we can
625		 * drop from so don't enqueue this one.
626		 */
627		return (m);
628	}
629
630 enqueue:
631	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
632	ml_enqueue(pl, m);
633
634	return (n);
635}
636
637struct mbuf *
638priq_deq_begin(struct ifqueue *ifq, void **cookiep)
639{
640	struct priq *pq = ifq->ifq_q;
641	struct mbuf_list *pl;
642	unsigned int prio = nitems(pq->pq_lists);
643	struct mbuf *m;
644
645	do {
646		pl = &pq->pq_lists[--prio];
647		m = MBUF_LIST_FIRST(pl);
648		if (m != NULL) {
649			*cookiep = pl;
650			return (m);
651		}
652	} while (prio > 0);
653
654	return (NULL);
655}
656
657void
658priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
659{
660	struct mbuf_list *pl = cookie;
661
662	KASSERT(MBUF_LIST_FIRST(pl) == m);
663
664	ml_dequeue(pl);
665}
666
667void
668priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
669{
670	struct priq *pq = ifq->ifq_q;
671	struct mbuf_list *pl;
672	unsigned int prio = nitems(pq->pq_lists);
673
674	do {
675		pl = &pq->pq_lists[--prio];
676		ml_enlist(ml, pl);
677	} while (prio > 0);
678}
679