ifq.c revision 1.13
1/*	$OpenBSD: ifq.c,v 1.13 2017/11/14 00:00:35 dlg Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/param.h>
20#include <sys/systm.h>
21#include <sys/socket.h>
22#include <sys/mbuf.h>
23#include <sys/proc.h>
24
25#include <net/if.h>
26#include <net/if_var.h>
27
28/*
29 * priq glue
30 */
31unsigned int	 priq_idx(unsigned int, const struct mbuf *);
32struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
33struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
34void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
35void		 priq_purge(struct ifqueue *, struct mbuf_list *);
36
37void		*priq_alloc(unsigned int, void *);
38void		 priq_free(unsigned int, void *);
39
40const struct ifq_ops priq_ops = {
41	priq_idx,
42	priq_enq,
43	priq_deq_begin,
44	priq_deq_commit,
45	priq_purge,
46	priq_alloc,
47	priq_free,
48};
49
50const struct ifq_ops * const ifq_priq_ops = &priq_ops;
51
52/*
53 * priq internal structures
54 */
55
56struct priq {
57	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
58};
59
60/*
61 * ifqueue serialiser
62 */
63
64void	ifq_start_task(void *);
65void	ifq_restart_task(void *);
66void	ifq_barrier_task(void *);
67void	ifq_bundle_task(void *);
68
69#define TASK_ONQUEUE 0x1
70
71static inline void
72ifq_run_start(struct ifqueue *ifq)
73{
74	ifq_serialize(ifq, &ifq->ifq_start);
75}
76
77void
78ifq_serialize(struct ifqueue *ifq, struct task *t)
79{
80	struct task work;
81
82	if (ISSET(t->t_flags, TASK_ONQUEUE))
83		return;
84
85	mtx_enter(&ifq->ifq_task_mtx);
86	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
87		SET(t->t_flags, TASK_ONQUEUE);
88		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
89	}
90
91	if (ifq->ifq_serializer == NULL) {
92		ifq->ifq_serializer = curcpu();
93
94		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
95			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
96			CLR(t->t_flags, TASK_ONQUEUE);
97			work = *t; /* copy to caller to avoid races */
98
99			mtx_leave(&ifq->ifq_task_mtx);
100
101			(*work.t_func)(work.t_arg);
102
103			mtx_enter(&ifq->ifq_task_mtx);
104		}
105
106		ifq->ifq_serializer = NULL;
107	}
108	mtx_leave(&ifq->ifq_task_mtx);
109}
110
111int
112ifq_is_serialized(struct ifqueue *ifq)
113{
114	return (ifq->ifq_serializer == curcpu());
115}
116
117void
118ifq_start(struct ifqueue *ifq)
119{
120	if (ifq_len(ifq) >= min(4, ifq->ifq_maxlen)) {
121		task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
122		ifq_run_start(ifq);
123	} else
124		task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
125}
126
127void
128ifq_start_task(void *p)
129{
130	struct ifqueue *ifq = p;
131	struct ifnet *ifp = ifq->ifq_if;
132
133	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
134	    ifq_empty(ifq) || ifq_is_oactive(ifq))
135		return;
136
137	ifp->if_qstart(ifq);
138}
139
140void
141ifq_restart_task(void *p)
142{
143	struct ifqueue *ifq = p;
144	struct ifnet *ifp = ifq->ifq_if;
145
146	ifq_clr_oactive(ifq);
147	ifp->if_qstart(ifq);
148}
149
150void
151ifq_bundle_task(void *p)
152{
153	struct ifqueue *ifq = p;
154
155	ifq_run_start(ifq);
156}
157
158void
159ifq_barrier(struct ifqueue *ifq)
160{
161	struct sleep_state sls;
162	unsigned int notdone = 1;
163	struct task t = TASK_INITIALIZER(ifq_barrier_task, &notdone);
164
165	/* this should only be called from converted drivers */
166	KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
167
168	if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) {
169		int netlocked = (rw_status(&netlock) == RW_WRITE);
170
171		if (netlocked) /* XXXSMP breaks atomicity */
172			NET_UNLOCK();
173
174		taskq_barrier(ifq->ifq_softnet);
175
176		if (netlocked)
177			NET_LOCK();
178	}
179
180	if (ifq->ifq_serializer == NULL)
181		return;
182
183	ifq_serialize(ifq, &t);
184
185	while (notdone) {
186		sleep_setup(&sls, &notdone, PWAIT, "ifqbar");
187		sleep_finish(&sls, notdone);
188	}
189}
190
191void
192ifq_barrier_task(void *p)
193{
194	unsigned int *notdone = p;
195
196	*notdone = 0;
197	wakeup_one(notdone);
198}
199
200/*
201 * ifqueue mbuf queue API
202 */
203
204void
205ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
206{
207	ifq->ifq_if = ifp;
208	ifq->ifq_softnet = net_tq(ifp->if_index);
209	ifq->ifq_softc = NULL;
210
211	mtx_init(&ifq->ifq_mtx, IPL_NET);
212	ifq->ifq_qdrops = 0;
213
214	/* default to priq */
215	ifq->ifq_ops = &priq_ops;
216	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
217
218	ml_init(&ifq->ifq_free);
219	ifq->ifq_len = 0;
220
221	ifq->ifq_packets = 0;
222	ifq->ifq_bytes = 0;
223	ifq->ifq_qdrops = 0;
224	ifq->ifq_errors = 0;
225	ifq->ifq_mcasts = 0;
226
227	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
228	TAILQ_INIT(&ifq->ifq_task_list);
229	ifq->ifq_serializer = NULL;
230	task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
231
232	task_set(&ifq->ifq_start, ifq_start_task, ifq);
233	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
234
235	if (ifq->ifq_maxlen == 0)
236		ifq_set_maxlen(ifq, IFQ_MAXLEN);
237
238	ifq->ifq_idx = idx;
239}
240
241void
242ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
243{
244	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
245	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
246	struct mbuf *m;
247	const struct ifq_ops *oldops;
248	void *newq, *oldq;
249
250	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
251
252	mtx_enter(&ifq->ifq_mtx);
253	ifq->ifq_ops->ifqop_purge(ifq, &ml);
254	ifq->ifq_len = 0;
255
256	oldops = ifq->ifq_ops;
257	oldq = ifq->ifq_q;
258
259	ifq->ifq_ops = newops;
260	ifq->ifq_q = newq;
261
262	while ((m = ml_dequeue(&ml)) != NULL) {
263		m = ifq->ifq_ops->ifqop_enq(ifq, m);
264		if (m != NULL) {
265			ifq->ifq_qdrops++;
266			ml_enqueue(&free_ml, m);
267		} else
268			ifq->ifq_len++;
269	}
270	mtx_leave(&ifq->ifq_mtx);
271
272	oldops->ifqop_free(ifq->ifq_idx, oldq);
273
274	ml_purge(&free_ml);
275}
276
277void
278ifq_destroy(struct ifqueue *ifq)
279{
280	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
281
282	ifq_barrier(ifq); /* ensure nothing is running with the ifq */
283
284	/* don't need to lock because this is the last use of the ifq */
285
286	ifq->ifq_ops->ifqop_purge(ifq, &ml);
287	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
288
289	ml_purge(&ml);
290}
291
292int
293ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
294{
295	struct mbuf *dm;
296
297	mtx_enter(&ifq->ifq_mtx);
298	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
299	if (dm != m) {
300		ifq->ifq_packets++;
301		ifq->ifq_bytes += m->m_pkthdr.len;
302		if (ISSET(m->m_flags, M_MCAST))
303			ifq->ifq_mcasts++;
304	}
305
306	if (dm == NULL)
307		ifq->ifq_len++;
308	else
309		ifq->ifq_qdrops++;
310	mtx_leave(&ifq->ifq_mtx);
311
312	if (dm != NULL)
313		m_freem(dm);
314
315	return (dm == m ? ENOBUFS : 0);
316}
317
318static inline void
319ifq_deq_enter(struct ifqueue *ifq)
320{
321	mtx_enter(&ifq->ifq_mtx);
322}
323
324static inline void
325ifq_deq_leave(struct ifqueue *ifq)
326{
327	struct mbuf_list ml;
328
329	ml = ifq->ifq_free;
330	ml_init(&ifq->ifq_free);
331
332	mtx_leave(&ifq->ifq_mtx);
333
334	if (!ml_empty(&ml))
335		ml_purge(&ml);
336}
337
338struct mbuf *
339ifq_deq_begin(struct ifqueue *ifq)
340{
341	struct mbuf *m = NULL;
342	void *cookie;
343
344	ifq_deq_enter(ifq);
345	if (ifq->ifq_len == 0 ||
346	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
347		ifq_deq_leave(ifq);
348		return (NULL);
349	}
350
351	m->m_pkthdr.ph_cookie = cookie;
352
353	return (m);
354}
355
356void
357ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
358{
359	void *cookie;
360
361	KASSERT(m != NULL);
362	cookie = m->m_pkthdr.ph_cookie;
363
364	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
365	ifq->ifq_len--;
366	ifq_deq_leave(ifq);
367}
368
369void
370ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
371{
372	KASSERT(m != NULL);
373
374	ifq_deq_leave(ifq);
375}
376
377struct mbuf *
378ifq_dequeue(struct ifqueue *ifq)
379{
380	struct mbuf *m;
381
382	m = ifq_deq_begin(ifq);
383	if (m == NULL)
384		return (NULL);
385
386	ifq_deq_commit(ifq, m);
387
388	return (m);
389}
390
391unsigned int
392ifq_purge(struct ifqueue *ifq)
393{
394	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
395	unsigned int rv;
396
397	mtx_enter(&ifq->ifq_mtx);
398	ifq->ifq_ops->ifqop_purge(ifq, &ml);
399	rv = ifq->ifq_len;
400	ifq->ifq_len = 0;
401	ifq->ifq_qdrops += rv;
402	mtx_leave(&ifq->ifq_mtx);
403
404	KASSERT(rv == ml_len(&ml));
405
406	ml_purge(&ml);
407
408	return (rv);
409}
410
411void *
412ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
413{
414	mtx_enter(&ifq->ifq_mtx);
415	if (ifq->ifq_ops == ops)
416		return (ifq->ifq_q);
417
418	mtx_leave(&ifq->ifq_mtx);
419
420	return (NULL);
421}
422
423void
424ifq_q_leave(struct ifqueue *ifq, void *q)
425{
426	KASSERT(q == ifq->ifq_q);
427	mtx_leave(&ifq->ifq_mtx);
428}
429
430void
431ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
432{
433	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
434
435	ifq->ifq_len--;
436	ifq->ifq_qdrops++;
437	ml_enqueue(&ifq->ifq_free, m);
438}
439
440void
441ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
442{
443	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
444
445	ifq->ifq_len -= ml_len(ml);
446	ifq->ifq_qdrops += ml_len(ml);
447	ml_enlist(&ifq->ifq_free, ml);
448}
449
450/*
451 * priq implementation
452 */
453
454unsigned int
455priq_idx(unsigned int nqueues, const struct mbuf *m)
456{
457	unsigned int flow = 0;
458
459	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
460		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
461
462	return (flow % nqueues);
463}
464
465void *
466priq_alloc(unsigned int idx, void *null)
467{
468	struct priq *pq;
469	int i;
470
471	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
472	for (i = 0; i < IFQ_NQUEUES; i++)
473		ml_init(&pq->pq_lists[i]);
474	return (pq);
475}
476
477void
478priq_free(unsigned int idx, void *pq)
479{
480	free(pq, M_DEVBUF, sizeof(struct priq));
481}
482
483struct mbuf *
484priq_enq(struct ifqueue *ifq, struct mbuf *m)
485{
486	struct priq *pq;
487	struct mbuf_list *pl;
488	struct mbuf *n = NULL;
489	unsigned int prio;
490
491	pq = ifq->ifq_q;
492	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
493
494	/* Find a lower priority queue to drop from */
495	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
496		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
497			pl = &pq->pq_lists[prio];
498			if (ml_len(pl) > 0) {
499				n = ml_dequeue(pl);
500				goto enqueue;
501			}
502		}
503		/*
504		 * There's no lower priority queue that we can
505		 * drop from so don't enqueue this one.
506		 */
507		return (m);
508	}
509
510 enqueue:
511	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
512	ml_enqueue(pl, m);
513
514	return (n);
515}
516
517struct mbuf *
518priq_deq_begin(struct ifqueue *ifq, void **cookiep)
519{
520	struct priq *pq = ifq->ifq_q;
521	struct mbuf_list *pl;
522	unsigned int prio = nitems(pq->pq_lists);
523	struct mbuf *m;
524
525	do {
526		pl = &pq->pq_lists[--prio];
527		m = MBUF_LIST_FIRST(pl);
528		if (m != NULL) {
529			*cookiep = pl;
530			return (m);
531		}
532	} while (prio > 0);
533
534	return (NULL);
535}
536
537void
538priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
539{
540	struct mbuf_list *pl = cookie;
541
542	KASSERT(MBUF_LIST_FIRST(pl) == m);
543
544	ml_dequeue(pl);
545}
546
547void
548priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
549{
550	struct priq *pq = ifq->ifq_q;
551	struct mbuf_list *pl;
552	unsigned int prio = nitems(pq->pq_lists);
553
554	do {
555		pl = &pq->pq_lists[--prio];
556		ml_enlist(ml, pl);
557	} while (prio > 0);
558}
559