ifq.c revision 1.21
1/*	$OpenBSD: ifq.c,v 1.21 2018/01/04 11:02:57 tb Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bpfilter.h"
20
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/socket.h>
24#include <sys/mbuf.h>
25#include <sys/proc.h>
26
27#include <net/if.h>
28#include <net/if_var.h>
29
30#if NBPFILTER > 0
31#include <net/bpf.h>
32#endif
33
34/*
35 * priq glue
36 */
37unsigned int	 priq_idx(unsigned int, const struct mbuf *);
38struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
39struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
40void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
41void		 priq_purge(struct ifqueue *, struct mbuf_list *);
42
43void		*priq_alloc(unsigned int, void *);
44void		 priq_free(unsigned int, void *);
45
46const struct ifq_ops priq_ops = {
47	priq_idx,
48	priq_enq,
49	priq_deq_begin,
50	priq_deq_commit,
51	priq_purge,
52	priq_alloc,
53	priq_free,
54};
55
56const struct ifq_ops * const ifq_priq_ops = &priq_ops;
57
58/*
59 * priq internal structures
60 */
61
62struct priq {
63	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
64};
65
66/*
67 * ifqueue serialiser
68 */
69
70void	ifq_start_task(void *);
71void	ifq_restart_task(void *);
72void	ifq_barrier_task(void *);
73
74#define TASK_ONQUEUE 0x1
75
76void
77ifq_serialize(struct ifqueue *ifq, struct task *t)
78{
79	struct task work;
80
81	if (ISSET(t->t_flags, TASK_ONQUEUE))
82		return;
83
84	mtx_enter(&ifq->ifq_task_mtx);
85	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
86		SET(t->t_flags, TASK_ONQUEUE);
87		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
88	}
89
90	if (ifq->ifq_serializer == NULL) {
91		ifq->ifq_serializer = curcpu();
92
93		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
94			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
95			CLR(t->t_flags, TASK_ONQUEUE);
96			work = *t; /* copy to caller to avoid races */
97
98			mtx_leave(&ifq->ifq_task_mtx);
99
100			(*work.t_func)(work.t_arg);
101
102			mtx_enter(&ifq->ifq_task_mtx);
103		}
104
105		ifq->ifq_serializer = NULL;
106	}
107	mtx_leave(&ifq->ifq_task_mtx);
108}
109
110int
111ifq_is_serialized(struct ifqueue *ifq)
112{
113	return (ifq->ifq_serializer == curcpu());
114}
115
116void
117ifq_start_task(void *p)
118{
119	struct ifqueue *ifq = p;
120	struct ifnet *ifp = ifq->ifq_if;
121
122	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
123	    ifq_empty(ifq) || ifq_is_oactive(ifq))
124		return;
125
126	ifp->if_qstart(ifq);
127}
128
129void
130ifq_restart_task(void *p)
131{
132	struct ifqueue *ifq = p;
133	struct ifnet *ifp = ifq->ifq_if;
134
135	ifq_clr_oactive(ifq);
136	ifp->if_qstart(ifq);
137}
138
139void
140ifq_barrier(struct ifqueue *ifq)
141{
142	struct cond c = COND_INITIALIZER();
143	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
144
145	if (ifq->ifq_serializer == NULL)
146		return;
147
148	ifq_serialize(ifq, &t);
149
150	cond_wait(&c, "ifqbar");
151}
152
153void
154ifq_barrier_task(void *p)
155{
156	struct cond *c = p;
157
158	cond_signal(c);
159}
160
161/*
162 * ifqueue mbuf queue API
163 */
164
165void
166ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
167{
168	ifq->ifq_if = ifp;
169	ifq->ifq_softc = NULL;
170
171	mtx_init(&ifq->ifq_mtx, IPL_NET);
172	ifq->ifq_qdrops = 0;
173
174	/* default to priq */
175	ifq->ifq_ops = &priq_ops;
176	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
177
178	ml_init(&ifq->ifq_free);
179	ifq->ifq_len = 0;
180
181	ifq->ifq_packets = 0;
182	ifq->ifq_bytes = 0;
183	ifq->ifq_qdrops = 0;
184	ifq->ifq_errors = 0;
185	ifq->ifq_mcasts = 0;
186
187	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
188	TAILQ_INIT(&ifq->ifq_task_list);
189	ifq->ifq_serializer = NULL;
190
191	task_set(&ifq->ifq_start, ifq_start_task, ifq);
192	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
193
194	if (ifq->ifq_maxlen == 0)
195		ifq_set_maxlen(ifq, IFQ_MAXLEN);
196
197	ifq->ifq_idx = idx;
198}
199
200void
201ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
202{
203	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
204	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
205	struct mbuf *m;
206	const struct ifq_ops *oldops;
207	void *newq, *oldq;
208
209	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
210
211	mtx_enter(&ifq->ifq_mtx);
212	ifq->ifq_ops->ifqop_purge(ifq, &ml);
213	ifq->ifq_len = 0;
214
215	oldops = ifq->ifq_ops;
216	oldq = ifq->ifq_q;
217
218	ifq->ifq_ops = newops;
219	ifq->ifq_q = newq;
220
221	while ((m = ml_dequeue(&ml)) != NULL) {
222		m = ifq->ifq_ops->ifqop_enq(ifq, m);
223		if (m != NULL) {
224			ifq->ifq_qdrops++;
225			ml_enqueue(&free_ml, m);
226		} else
227			ifq->ifq_len++;
228	}
229	mtx_leave(&ifq->ifq_mtx);
230
231	oldops->ifqop_free(ifq->ifq_idx, oldq);
232
233	ml_purge(&free_ml);
234}
235
236void
237ifq_destroy(struct ifqueue *ifq)
238{
239	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
240
241	/* don't need to lock because this is the last use of the ifq */
242
243	ifq->ifq_ops->ifqop_purge(ifq, &ml);
244	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
245
246	ml_purge(&ml);
247}
248
249void
250ifq_add_data(struct ifqueue *ifq, struct if_data *data)
251{
252	mtx_enter(&ifq->ifq_mtx);
253	data->ifi_opackets += ifq->ifq_packets;
254	data->ifi_obytes += ifq->ifq_bytes;
255	data->ifi_oqdrops += ifq->ifq_qdrops;
256	data->ifi_omcasts += ifq->ifq_mcasts;
257	/* ifp->if_data.ifi_oerrors */
258	mtx_leave(&ifq->ifq_mtx);
259}
260
261int
262ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
263{
264	struct mbuf *dm;
265
266	mtx_enter(&ifq->ifq_mtx);
267	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
268	if (dm != m) {
269		ifq->ifq_packets++;
270		ifq->ifq_bytes += m->m_pkthdr.len;
271		if (ISSET(m->m_flags, M_MCAST))
272			ifq->ifq_mcasts++;
273	}
274
275	if (dm == NULL)
276		ifq->ifq_len++;
277	else
278		ifq->ifq_qdrops++;
279	mtx_leave(&ifq->ifq_mtx);
280
281	if (dm != NULL)
282		m_freem(dm);
283
284	return (dm == m ? ENOBUFS : 0);
285}
286
287static inline void
288ifq_deq_enter(struct ifqueue *ifq)
289{
290	mtx_enter(&ifq->ifq_mtx);
291}
292
293static inline void
294ifq_deq_leave(struct ifqueue *ifq)
295{
296	struct mbuf_list ml;
297
298	ml = ifq->ifq_free;
299	ml_init(&ifq->ifq_free);
300
301	mtx_leave(&ifq->ifq_mtx);
302
303	if (!ml_empty(&ml))
304		ml_purge(&ml);
305}
306
307struct mbuf *
308ifq_deq_begin(struct ifqueue *ifq)
309{
310	struct mbuf *m = NULL;
311	void *cookie;
312
313	ifq_deq_enter(ifq);
314	if (ifq->ifq_len == 0 ||
315	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
316		ifq_deq_leave(ifq);
317		return (NULL);
318	}
319
320	m->m_pkthdr.ph_cookie = cookie;
321
322	return (m);
323}
324
325void
326ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
327{
328	void *cookie;
329
330	KASSERT(m != NULL);
331	cookie = m->m_pkthdr.ph_cookie;
332
333	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
334	ifq->ifq_len--;
335	ifq_deq_leave(ifq);
336}
337
338void
339ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
340{
341	KASSERT(m != NULL);
342
343	ifq_deq_leave(ifq);
344}
345
346struct mbuf *
347ifq_dequeue(struct ifqueue *ifq)
348{
349	struct mbuf *m;
350
351	m = ifq_deq_begin(ifq);
352	if (m == NULL)
353		return (NULL);
354
355	ifq_deq_commit(ifq, m);
356
357	return (m);
358}
359
360unsigned int
361ifq_purge(struct ifqueue *ifq)
362{
363	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
364	unsigned int rv;
365
366	mtx_enter(&ifq->ifq_mtx);
367	ifq->ifq_ops->ifqop_purge(ifq, &ml);
368	rv = ifq->ifq_len;
369	ifq->ifq_len = 0;
370	ifq->ifq_qdrops += rv;
371	mtx_leave(&ifq->ifq_mtx);
372
373	KASSERT(rv == ml_len(&ml));
374
375	ml_purge(&ml);
376
377	return (rv);
378}
379
380void *
381ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
382{
383	mtx_enter(&ifq->ifq_mtx);
384	if (ifq->ifq_ops == ops)
385		return (ifq->ifq_q);
386
387	mtx_leave(&ifq->ifq_mtx);
388
389	return (NULL);
390}
391
392void
393ifq_q_leave(struct ifqueue *ifq, void *q)
394{
395	KASSERT(q == ifq->ifq_q);
396	mtx_leave(&ifq->ifq_mtx);
397}
398
399void
400ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
401{
402	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
403
404	ifq->ifq_len--;
405	ifq->ifq_qdrops++;
406	ml_enqueue(&ifq->ifq_free, m);
407}
408
409void
410ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
411{
412	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
413
414	ifq->ifq_len -= ml_len(ml);
415	ifq->ifq_qdrops += ml_len(ml);
416	ml_enlist(&ifq->ifq_free, ml);
417}
418
419/*
420 * ifiq
421 */
422
423static void	ifiq_process(void *);
424
425void
426ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
427{
428	ifiq->ifiq_if = ifp;
429	ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
430	ifiq->ifiq_softc = NULL;
431
432	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
433	ml_init(&ifiq->ifiq_ml);
434	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
435
436	ifiq->ifiq_qdrops = 0;
437	ifiq->ifiq_packets = 0;
438	ifiq->ifiq_bytes = 0;
439	ifiq->ifiq_qdrops = 0;
440	ifiq->ifiq_errors = 0;
441
442	ifiq->ifiq_idx = idx;
443}
444
445void
446ifiq_destroy(struct ifiqueue *ifiq)
447{
448	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) {
449		int netlocked = (rw_status(&netlock) == RW_WRITE);
450
451		if (netlocked) /* XXXSMP breaks atomicity */
452			NET_UNLOCK();
453
454		taskq_barrier(ifiq->ifiq_softnet);
455
456		if (netlocked)
457			NET_LOCK();
458	}
459
460	/* don't need to lock because this is the last use of the ifiq */
461	ml_purge(&ifiq->ifiq_ml);
462}
463
464int
465ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm)
466{
467	struct ifnet *ifp = ifiq->ifiq_if;
468	struct mbuf *m;
469	uint64_t packets;
470	uint64_t bytes = 0;
471#if NBPFILTER > 0
472	caddr_t if_bpf;
473#endif
474	int rv = 1;
475
476	if (ml_empty(ml))
477		return (0);
478
479	MBUF_LIST_FOREACH(ml, m) {
480		m->m_pkthdr.ph_ifidx = ifp->if_index;
481		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
482		bytes += m->m_pkthdr.len;
483	}
484	packets = ml_len(ml);
485
486#if NBPFILTER > 0
487	if_bpf = ifp->if_bpf;
488	if (if_bpf) {
489		struct mbuf_list ml0 = *ml;
490
491		ml_init(ml);
492
493		while ((m = ml_dequeue(&ml0)) != NULL) {
494			if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN))
495				m_freem(m);
496			else
497				ml_enqueue(ml, m);
498		}
499
500		if (ml_empty(ml)) {
501			mtx_enter(&ifiq->ifiq_mtx);
502			ifiq->ifiq_packets += packets;
503			ifiq->ifiq_bytes += bytes;
504			mtx_leave(&ifiq->ifiq_mtx);
505
506			return (0);
507		}
508	}
509#endif
510
511	mtx_enter(&ifiq->ifiq_mtx);
512	ifiq->ifiq_packets += packets;
513	ifiq->ifiq_bytes += bytes;
514
515	if (ifiq_len(ifiq) >= cwm * 5)
516		ifiq->ifiq_qdrops += ml_len(ml);
517	else {
518		rv = (ifiq_len(ifiq) >= cwm * 3);
519		ml_enlist(&ifiq->ifiq_ml, ml);
520	}
521	mtx_leave(&ifiq->ifiq_mtx);
522
523	if (ml_empty(ml))
524		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
525	else
526		ml_purge(ml);
527
528	return (rv);
529}
530
531void
532ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
533{
534	mtx_enter(&ifiq->ifiq_mtx);
535	data->ifi_ipackets += ifiq->ifiq_packets;
536	data->ifi_ibytes += ifiq->ifiq_bytes;
537	data->ifi_iqdrops += ifiq->ifiq_qdrops;
538	mtx_leave(&ifiq->ifiq_mtx);
539}
540
541void
542ifiq_barrier(struct ifiqueue *ifiq)
543{
544	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
545		taskq_barrier(ifiq->ifiq_softnet);
546}
547
548int
549ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
550{
551	mtx_enter(&ifiq->ifiq_mtx);
552	ml_enqueue(&ifiq->ifiq_ml, m);
553	mtx_leave(&ifiq->ifiq_mtx);
554
555	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
556
557	return (0);
558}
559
560static void
561ifiq_process(void *arg)
562{
563	struct ifiqueue *ifiq = arg;
564	struct mbuf_list ml;
565
566	if (ifiq_empty(ifiq))
567		return;
568
569	mtx_enter(&ifiq->ifiq_mtx);
570	ml = ifiq->ifiq_ml;
571	ml_init(&ifiq->ifiq_ml);
572	mtx_leave(&ifiq->ifiq_mtx);
573
574	if_input_process(ifiq->ifiq_if, &ml);
575}
576
577/*
578 * priq implementation
579 */
580
581unsigned int
582priq_idx(unsigned int nqueues, const struct mbuf *m)
583{
584	unsigned int flow = 0;
585
586	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
587		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
588
589	return (flow % nqueues);
590}
591
592void *
593priq_alloc(unsigned int idx, void *null)
594{
595	struct priq *pq;
596	int i;
597
598	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
599	for (i = 0; i < IFQ_NQUEUES; i++)
600		ml_init(&pq->pq_lists[i]);
601	return (pq);
602}
603
604void
605priq_free(unsigned int idx, void *pq)
606{
607	free(pq, M_DEVBUF, sizeof(struct priq));
608}
609
610struct mbuf *
611priq_enq(struct ifqueue *ifq, struct mbuf *m)
612{
613	struct priq *pq;
614	struct mbuf_list *pl;
615	struct mbuf *n = NULL;
616	unsigned int prio;
617
618	pq = ifq->ifq_q;
619	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
620
621	/* Find a lower priority queue to drop from */
622	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
623		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
624			pl = &pq->pq_lists[prio];
625			if (ml_len(pl) > 0) {
626				n = ml_dequeue(pl);
627				goto enqueue;
628			}
629		}
630		/*
631		 * There's no lower priority queue that we can
632		 * drop from so don't enqueue this one.
633		 */
634		return (m);
635	}
636
637 enqueue:
638	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
639	ml_enqueue(pl, m);
640
641	return (n);
642}
643
644struct mbuf *
645priq_deq_begin(struct ifqueue *ifq, void **cookiep)
646{
647	struct priq *pq = ifq->ifq_q;
648	struct mbuf_list *pl;
649	unsigned int prio = nitems(pq->pq_lists);
650	struct mbuf *m;
651
652	do {
653		pl = &pq->pq_lists[--prio];
654		m = MBUF_LIST_FIRST(pl);
655		if (m != NULL) {
656			*cookiep = pl;
657			return (m);
658		}
659	} while (prio > 0);
660
661	return (NULL);
662}
663
664void
665priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
666{
667	struct mbuf_list *pl = cookie;
668
669	KASSERT(MBUF_LIST_FIRST(pl) == m);
670
671	ml_dequeue(pl);
672}
673
674void
675priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
676{
677	struct priq *pq = ifq->ifq_q;
678	struct mbuf_list *pl;
679	unsigned int prio = nitems(pq->pq_lists);
680
681	do {
682		pl = &pq->pq_lists[--prio];
683		ml_enlist(ml, pl);
684	} while (prio > 0);
685}
686