ifq.c revision 1.18
1/*	$OpenBSD: ifq.c,v 1.18 2017/12/15 01:37:30 dlg Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bpfilter.h"
20
21#include <sys/param.h>
22#include <sys/systm.h>
23#include <sys/socket.h>
24#include <sys/mbuf.h>
25#include <sys/proc.h>
26
27#include <net/if.h>
28#include <net/if_var.h>
29
30#if NBPFILTER > 0
31#include <net/bpf.h>
32#endif
33
34/*
35 * priq glue
36 */
37unsigned int	 priq_idx(unsigned int, const struct mbuf *);
38struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
39struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
40void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
41void		 priq_purge(struct ifqueue *, struct mbuf_list *);
42
43void		*priq_alloc(unsigned int, void *);
44void		 priq_free(unsigned int, void *);
45
46const struct ifq_ops priq_ops = {
47	priq_idx,
48	priq_enq,
49	priq_deq_begin,
50	priq_deq_commit,
51	priq_purge,
52	priq_alloc,
53	priq_free,
54};
55
56const struct ifq_ops * const ifq_priq_ops = &priq_ops;
57
58/*
59 * priq internal structures
60 */
61
62struct priq {
63	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
64};
65
66/*
67 * ifqueue serialiser
68 */
69
70void	ifq_start_task(void *);
71void	ifq_restart_task(void *);
72void	ifq_barrier_task(void *);
73
74#define TASK_ONQUEUE 0x1
75
76void
77ifq_serialize(struct ifqueue *ifq, struct task *t)
78{
79	struct task work;
80
81	if (ISSET(t->t_flags, TASK_ONQUEUE))
82		return;
83
84	mtx_enter(&ifq->ifq_task_mtx);
85	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
86		SET(t->t_flags, TASK_ONQUEUE);
87		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
88	}
89
90	if (ifq->ifq_serializer == NULL) {
91		ifq->ifq_serializer = curcpu();
92
93		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
94			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
95			CLR(t->t_flags, TASK_ONQUEUE);
96			work = *t; /* copy to caller to avoid races */
97
98			mtx_leave(&ifq->ifq_task_mtx);
99
100			(*work.t_func)(work.t_arg);
101
102			mtx_enter(&ifq->ifq_task_mtx);
103		}
104
105		ifq->ifq_serializer = NULL;
106	}
107	mtx_leave(&ifq->ifq_task_mtx);
108}
109
110int
111ifq_is_serialized(struct ifqueue *ifq)
112{
113	return (ifq->ifq_serializer == curcpu());
114}
115
116void
117ifq_start_task(void *p)
118{
119	struct ifqueue *ifq = p;
120	struct ifnet *ifp = ifq->ifq_if;
121
122	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
123	    ifq_empty(ifq) || ifq_is_oactive(ifq))
124		return;
125
126	ifp->if_qstart(ifq);
127}
128
129void
130ifq_restart_task(void *p)
131{
132	struct ifqueue *ifq = p;
133	struct ifnet *ifp = ifq->ifq_if;
134
135	ifq_clr_oactive(ifq);
136	ifp->if_qstart(ifq);
137}
138
139void
140ifq_barrier(struct ifqueue *ifq)
141{
142	struct cond c = COND_INITIALIZER();
143	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
144
145	/* this should only be called from converted drivers */
146	KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
147
148	if (ifq->ifq_serializer == NULL)
149		return;
150
151	ifq_serialize(ifq, &t);
152
153	cond_wait(&c, "ifqbar");
154}
155
156void
157ifq_barrier_task(void *p)
158{
159	struct cond *c = p;
160
161	cond_signal(c);
162}
163
164/*
165 * ifqueue mbuf queue API
166 */
167
168void
169ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
170{
171	ifq->ifq_if = ifp;
172	ifq->ifq_softc = NULL;
173
174	mtx_init(&ifq->ifq_mtx, IPL_NET);
175	ifq->ifq_qdrops = 0;
176
177	/* default to priq */
178	ifq->ifq_ops = &priq_ops;
179	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
180
181	ml_init(&ifq->ifq_free);
182	ifq->ifq_len = 0;
183
184	ifq->ifq_packets = 0;
185	ifq->ifq_bytes = 0;
186	ifq->ifq_qdrops = 0;
187	ifq->ifq_errors = 0;
188	ifq->ifq_mcasts = 0;
189
190	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
191	TAILQ_INIT(&ifq->ifq_task_list);
192	ifq->ifq_serializer = NULL;
193
194	task_set(&ifq->ifq_start, ifq_start_task, ifq);
195	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
196
197	if (ifq->ifq_maxlen == 0)
198		ifq_set_maxlen(ifq, IFQ_MAXLEN);
199
200	ifq->ifq_idx = idx;
201}
202
203void
204ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
205{
206	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
207	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
208	struct mbuf *m;
209	const struct ifq_ops *oldops;
210	void *newq, *oldq;
211
212	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
213
214	mtx_enter(&ifq->ifq_mtx);
215	ifq->ifq_ops->ifqop_purge(ifq, &ml);
216	ifq->ifq_len = 0;
217
218	oldops = ifq->ifq_ops;
219	oldq = ifq->ifq_q;
220
221	ifq->ifq_ops = newops;
222	ifq->ifq_q = newq;
223
224	while ((m = ml_dequeue(&ml)) != NULL) {
225		m = ifq->ifq_ops->ifqop_enq(ifq, m);
226		if (m != NULL) {
227			ifq->ifq_qdrops++;
228			ml_enqueue(&free_ml, m);
229		} else
230			ifq->ifq_len++;
231	}
232	mtx_leave(&ifq->ifq_mtx);
233
234	oldops->ifqop_free(ifq->ifq_idx, oldq);
235
236	ml_purge(&free_ml);
237}
238
239void
240ifq_destroy(struct ifqueue *ifq)
241{
242	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
243
244	/* don't need to lock because this is the last use of the ifq */
245
246	ifq->ifq_ops->ifqop_purge(ifq, &ml);
247	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
248
249	ml_purge(&ml);
250}
251
252void
253ifq_add_data(struct ifqueue *ifq, struct if_data *data)
254{
255	mtx_enter(&ifq->ifq_mtx);
256	data->ifi_opackets += ifq->ifq_packets;
257	data->ifi_obytes += ifq->ifq_bytes;
258	data->ifi_oqdrops += ifq->ifq_qdrops;
259	data->ifi_omcasts += ifq->ifq_mcasts;
260	/* ifp->if_data.ifi_oerrors */
261	mtx_leave(&ifq->ifq_mtx);
262}
263
264int
265ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
266{
267	struct mbuf *dm;
268
269	mtx_enter(&ifq->ifq_mtx);
270	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
271	if (dm != m) {
272		ifq->ifq_packets++;
273		ifq->ifq_bytes += m->m_pkthdr.len;
274		if (ISSET(m->m_flags, M_MCAST))
275			ifq->ifq_mcasts++;
276	}
277
278	if (dm == NULL)
279		ifq->ifq_len++;
280	else
281		ifq->ifq_qdrops++;
282	mtx_leave(&ifq->ifq_mtx);
283
284	if (dm != NULL)
285		m_freem(dm);
286
287	return (dm == m ? ENOBUFS : 0);
288}
289
290static inline void
291ifq_deq_enter(struct ifqueue *ifq)
292{
293	mtx_enter(&ifq->ifq_mtx);
294}
295
296static inline void
297ifq_deq_leave(struct ifqueue *ifq)
298{
299	struct mbuf_list ml;
300
301	ml = ifq->ifq_free;
302	ml_init(&ifq->ifq_free);
303
304	mtx_leave(&ifq->ifq_mtx);
305
306	if (!ml_empty(&ml))
307		ml_purge(&ml);
308}
309
310struct mbuf *
311ifq_deq_begin(struct ifqueue *ifq)
312{
313	struct mbuf *m = NULL;
314	void *cookie;
315
316	ifq_deq_enter(ifq);
317	if (ifq->ifq_len == 0 ||
318	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
319		ifq_deq_leave(ifq);
320		return (NULL);
321	}
322
323	m->m_pkthdr.ph_cookie = cookie;
324
325	return (m);
326}
327
328void
329ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
330{
331	void *cookie;
332
333	KASSERT(m != NULL);
334	cookie = m->m_pkthdr.ph_cookie;
335
336	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
337	ifq->ifq_len--;
338	ifq_deq_leave(ifq);
339}
340
341void
342ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
343{
344	KASSERT(m != NULL);
345
346	ifq_deq_leave(ifq);
347}
348
349struct mbuf *
350ifq_dequeue(struct ifqueue *ifq)
351{
352	struct mbuf *m;
353
354	m = ifq_deq_begin(ifq);
355	if (m == NULL)
356		return (NULL);
357
358	ifq_deq_commit(ifq, m);
359
360	return (m);
361}
362
363unsigned int
364ifq_purge(struct ifqueue *ifq)
365{
366	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
367	unsigned int rv;
368
369	mtx_enter(&ifq->ifq_mtx);
370	ifq->ifq_ops->ifqop_purge(ifq, &ml);
371	rv = ifq->ifq_len;
372	ifq->ifq_len = 0;
373	ifq->ifq_qdrops += rv;
374	mtx_leave(&ifq->ifq_mtx);
375
376	KASSERT(rv == ml_len(&ml));
377
378	ml_purge(&ml);
379
380	return (rv);
381}
382
383void *
384ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
385{
386	mtx_enter(&ifq->ifq_mtx);
387	if (ifq->ifq_ops == ops)
388		return (ifq->ifq_q);
389
390	mtx_leave(&ifq->ifq_mtx);
391
392	return (NULL);
393}
394
395void
396ifq_q_leave(struct ifqueue *ifq, void *q)
397{
398	KASSERT(q == ifq->ifq_q);
399	mtx_leave(&ifq->ifq_mtx);
400}
401
402void
403ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
404{
405	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
406
407	ifq->ifq_len--;
408	ifq->ifq_qdrops++;
409	ml_enqueue(&ifq->ifq_free, m);
410}
411
412void
413ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
414{
415	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
416
417	ifq->ifq_len -= ml_len(ml);
418	ifq->ifq_qdrops += ml_len(ml);
419	ml_enlist(&ifq->ifq_free, ml);
420}
421
422/*
423 * ifiq
424 */
425
426static void	ifiq_process(void *);
427
428void
429ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
430{
431	ifiq->ifiq_if = ifp;
432	ifiq->ifiq_softnet = net_tq(ifp->if_index); /* + idx */
433	ifiq->ifiq_softc = NULL;
434
435	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
436	ml_init(&ifiq->ifiq_ml);
437	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
438
439	ifiq->ifiq_qdrops = 0;
440	ifiq->ifiq_packets = 0;
441	ifiq->ifiq_bytes = 0;
442	ifiq->ifiq_qdrops = 0;
443	ifiq->ifiq_errors = 0;
444
445	ifiq->ifiq_idx = idx;
446}
447
448void
449ifiq_destroy(struct ifiqueue *ifiq)
450{
451	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task)) {
452		int netlocked = (rw_status(&netlock) == RW_WRITE);
453
454		if (netlocked) /* XXXSMP breaks atomicity */
455			NET_UNLOCK();
456
457		taskq_barrier(ifiq->ifiq_softnet);
458
459		if (netlocked)
460			NET_LOCK();
461	}
462
463	/* don't need to lock because this is the last use of the ifiq */
464	ml_purge(&ifiq->ifiq_ml);
465}
466
467int
468ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml, unsigned int cwm)
469{
470	struct ifnet *ifp = ifiq->ifiq_if;
471	struct mbuf *m;
472	uint64_t packets;
473	uint64_t bytes = 0;
474#if NBPFILTER > 0
475	caddr_t if_bpf;
476#endif
477	int rv = 1;
478
479	if (ml_empty(ml))
480		return (0);
481
482	MBUF_LIST_FOREACH(ml, m) {
483		m->m_pkthdr.ph_ifidx = ifp->if_index;
484		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
485		bytes += m->m_pkthdr.len;
486	}
487	packets = ml_len(ml);
488
489#if NBPFILTER > 0
490	if_bpf = ifp->if_bpf;
491	if (if_bpf) {
492		struct mbuf_list ml0 = *ml;
493
494		ml_init(ml);
495
496		while ((m = ml_dequeue(&ml0)) != NULL) {
497			if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_IN))
498				m_freem(m);
499			else
500				ml_enqueue(ml, m);
501		}
502
503		if (ml_empty(ml)) {
504			mtx_enter(&ifiq->ifiq_mtx);
505			ifiq->ifiq_packets += packets;
506			ifiq->ifiq_bytes += bytes;
507			mtx_leave(&ifiq->ifiq_mtx);
508
509			return (0);
510		}
511	}
512#endif
513
514	mtx_enter(&ifiq->ifiq_mtx);
515	ifiq->ifiq_packets += packets;
516	ifiq->ifiq_bytes += bytes;
517
518	if (ifiq_len(ifiq) >= cwm * 5)
519		ifiq->ifiq_qdrops += ml_len(ml);
520	else {
521		rv = (ifiq_len(ifiq) >= cwm * 3);
522		ml_enlist(&ifiq->ifiq_ml, ml);
523	}
524	mtx_leave(&ifiq->ifiq_mtx);
525
526	if (ml_empty(ml))
527		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
528	else
529		ml_purge(ml);
530
531	return (rv);
532}
533
534void
535ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
536{
537	mtx_enter(&ifiq->ifiq_mtx);
538	data->ifi_ipackets += ifiq->ifiq_packets;
539	data->ifi_ibytes += ifiq->ifiq_bytes;
540	data->ifi_iqdrops += ifiq->ifiq_qdrops;
541	mtx_leave(&ifiq->ifiq_mtx);
542}
543
544void
545ifiq_barrier(struct ifiqueue *ifiq)
546{
547	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
548		taskq_barrier(ifiq->ifiq_softnet);
549}
550
551int
552ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
553{
554	mtx_enter(&ifiq->ifiq_mtx);
555	ml_enqueue(&ifiq->ifiq_ml, m);
556	mtx_leave(&ifiq->ifiq_mtx);
557
558	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
559
560	return (0);
561}
562
563static void
564ifiq_process(void *arg)
565{
566	struct ifiqueue *ifiq = arg;
567	struct mbuf_list ml;
568
569	if (ifiq_empty(ifiq))
570		return;
571
572	mtx_enter(&ifiq->ifiq_mtx);
573	ml = ifiq->ifiq_ml;
574	ml_init(&ifiq->ifiq_ml);
575	mtx_leave(&ifiq->ifiq_mtx);
576
577	if_input_process(ifiq->ifiq_if, &ml);
578}
579
580/*
581 * priq implementation
582 */
583
584unsigned int
585priq_idx(unsigned int nqueues, const struct mbuf *m)
586{
587	unsigned int flow = 0;
588
589	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
590		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
591
592	return (flow % nqueues);
593}
594
595void *
596priq_alloc(unsigned int idx, void *null)
597{
598	struct priq *pq;
599	int i;
600
601	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
602	for (i = 0; i < IFQ_NQUEUES; i++)
603		ml_init(&pq->pq_lists[i]);
604	return (pq);
605}
606
607void
608priq_free(unsigned int idx, void *pq)
609{
610	free(pq, M_DEVBUF, sizeof(struct priq));
611}
612
613struct mbuf *
614priq_enq(struct ifqueue *ifq, struct mbuf *m)
615{
616	struct priq *pq;
617	struct mbuf_list *pl;
618	struct mbuf *n = NULL;
619	unsigned int prio;
620
621	pq = ifq->ifq_q;
622	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
623
624	/* Find a lower priority queue to drop from */
625	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
626		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
627			pl = &pq->pq_lists[prio];
628			if (ml_len(pl) > 0) {
629				n = ml_dequeue(pl);
630				goto enqueue;
631			}
632		}
633		/*
634		 * There's no lower priority queue that we can
635		 * drop from so don't enqueue this one.
636		 */
637		return (m);
638	}
639
640 enqueue:
641	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
642	ml_enqueue(pl, m);
643
644	return (n);
645}
646
647struct mbuf *
648priq_deq_begin(struct ifqueue *ifq, void **cookiep)
649{
650	struct priq *pq = ifq->ifq_q;
651	struct mbuf_list *pl;
652	unsigned int prio = nitems(pq->pq_lists);
653	struct mbuf *m;
654
655	do {
656		pl = &pq->pq_lists[--prio];
657		m = MBUF_LIST_FIRST(pl);
658		if (m != NULL) {
659			*cookiep = pl;
660			return (m);
661		}
662	} while (prio > 0);
663
664	return (NULL);
665}
666
667void
668priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
669{
670	struct mbuf_list *pl = cookie;
671
672	KASSERT(MBUF_LIST_FIRST(pl) == m);
673
674	ml_dequeue(pl);
675}
676
677void
678priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
679{
680	struct priq *pq = ifq->ifq_q;
681	struct mbuf_list *pl;
682	unsigned int prio = nitems(pq->pq_lists);
683
684	do {
685		pl = &pq->pq_lists[--prio];
686		ml_enlist(ml, pl);
687	} while (prio > 0);
688}
689