ifq.c revision 1.14
1/*	$OpenBSD: ifq.c,v 1.14 2017/11/14 04:08:11 dlg Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/param.h>
20#include <sys/systm.h>
21#include <sys/socket.h>
22#include <sys/mbuf.h>
23#include <sys/proc.h>
24
25#include <net/if.h>
26#include <net/if_var.h>
27
28/*
29 * priq glue
30 */
31unsigned int	 priq_idx(unsigned int, const struct mbuf *);
32struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
33struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
34void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
35void		 priq_purge(struct ifqueue *, struct mbuf_list *);
36
37void		*priq_alloc(unsigned int, void *);
38void		 priq_free(unsigned int, void *);
39
40const struct ifq_ops priq_ops = {
41	priq_idx,
42	priq_enq,
43	priq_deq_begin,
44	priq_deq_commit,
45	priq_purge,
46	priq_alloc,
47	priq_free,
48};
49
50const struct ifq_ops * const ifq_priq_ops = &priq_ops;
51
52/*
53 * priq internal structures
54 */
55
56struct priq {
57	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
58};
59
60/*
61 * ifqueue serialiser
62 */
63
64void	ifq_start_task(void *);
65void	ifq_restart_task(void *);
66void	ifq_barrier_task(void *);
67void	ifq_bundle_task(void *);
68
69#define TASK_ONQUEUE 0x1
70
71static inline void
72ifq_run_start(struct ifqueue *ifq)
73{
74	ifq_serialize(ifq, &ifq->ifq_start);
75}
76
77void
78ifq_serialize(struct ifqueue *ifq, struct task *t)
79{
80	struct task work;
81
82	if (ISSET(t->t_flags, TASK_ONQUEUE))
83		return;
84
85	mtx_enter(&ifq->ifq_task_mtx);
86	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
87		SET(t->t_flags, TASK_ONQUEUE);
88		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
89	}
90
91	if (ifq->ifq_serializer == NULL) {
92		ifq->ifq_serializer = curcpu();
93
94		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
95			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
96			CLR(t->t_flags, TASK_ONQUEUE);
97			work = *t; /* copy to caller to avoid races */
98
99			mtx_leave(&ifq->ifq_task_mtx);
100
101			(*work.t_func)(work.t_arg);
102
103			mtx_enter(&ifq->ifq_task_mtx);
104		}
105
106		ifq->ifq_serializer = NULL;
107	}
108	mtx_leave(&ifq->ifq_task_mtx);
109}
110
111int
112ifq_is_serialized(struct ifqueue *ifq)
113{
114	return (ifq->ifq_serializer == curcpu());
115}
116
117void
118ifq_start(struct ifqueue *ifq)
119{
120	if (ifq_len(ifq) >= min(4, ifq->ifq_maxlen)) {
121		task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
122		ifq_run_start(ifq);
123	} else
124		task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
125}
126
127void
128ifq_start_task(void *p)
129{
130	struct ifqueue *ifq = p;
131	struct ifnet *ifp = ifq->ifq_if;
132
133	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
134	    ifq_empty(ifq) || ifq_is_oactive(ifq))
135		return;
136
137	ifp->if_qstart(ifq);
138}
139
140void
141ifq_restart_task(void *p)
142{
143	struct ifqueue *ifq = p;
144	struct ifnet *ifp = ifq->ifq_if;
145
146	ifq_clr_oactive(ifq);
147	ifp->if_qstart(ifq);
148}
149
150void
151ifq_bundle_task(void *p)
152{
153	struct ifqueue *ifq = p;
154
155	ifq_run_start(ifq);
156}
157
158void
159ifq_barrier(struct ifqueue *ifq)
160{
161	struct sleep_state sls;
162	unsigned int notdone = 1;
163	struct task t = TASK_INITIALIZER(ifq_barrier_task, &notdone);
164
165	/* this should only be called from converted drivers */
166	KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE));
167
168	if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) {
169		int netlocked = (rw_status(&netlock) == RW_WRITE);
170
171		if (netlocked) /* XXXSMP breaks atomicity */
172			NET_UNLOCK();
173
174		taskq_barrier(ifq->ifq_softnet);
175
176		if (netlocked)
177			NET_LOCK();
178	}
179
180	if (ifq->ifq_serializer == NULL)
181		return;
182
183	ifq_serialize(ifq, &t);
184
185	while (notdone) {
186		sleep_setup(&sls, &notdone, PWAIT, "ifqbar");
187		sleep_finish(&sls, notdone);
188	}
189}
190
191void
192ifq_barrier_task(void *p)
193{
194	unsigned int *notdone = p;
195
196	*notdone = 0;
197	wakeup_one(notdone);
198}
199
200/*
201 * ifqueue mbuf queue API
202 */
203
204void
205ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
206{
207	ifq->ifq_if = ifp;
208	ifq->ifq_softnet = net_tq(ifp->if_index);
209	ifq->ifq_softc = NULL;
210
211	mtx_init(&ifq->ifq_mtx, IPL_NET);
212	ifq->ifq_qdrops = 0;
213
214	/* default to priq */
215	ifq->ifq_ops = &priq_ops;
216	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
217
218	ml_init(&ifq->ifq_free);
219	ifq->ifq_len = 0;
220
221	ifq->ifq_packets = 0;
222	ifq->ifq_bytes = 0;
223	ifq->ifq_qdrops = 0;
224	ifq->ifq_errors = 0;
225	ifq->ifq_mcasts = 0;
226
227	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
228	TAILQ_INIT(&ifq->ifq_task_list);
229	ifq->ifq_serializer = NULL;
230	task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
231
232	task_set(&ifq->ifq_start, ifq_start_task, ifq);
233	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
234
235	if (ifq->ifq_maxlen == 0)
236		ifq_set_maxlen(ifq, IFQ_MAXLEN);
237
238	ifq->ifq_idx = idx;
239}
240
241void
242ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
243{
244	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
245	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
246	struct mbuf *m;
247	const struct ifq_ops *oldops;
248	void *newq, *oldq;
249
250	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
251
252	mtx_enter(&ifq->ifq_mtx);
253	ifq->ifq_ops->ifqop_purge(ifq, &ml);
254	ifq->ifq_len = 0;
255
256	oldops = ifq->ifq_ops;
257	oldq = ifq->ifq_q;
258
259	ifq->ifq_ops = newops;
260	ifq->ifq_q = newq;
261
262	while ((m = ml_dequeue(&ml)) != NULL) {
263		m = ifq->ifq_ops->ifqop_enq(ifq, m);
264		if (m != NULL) {
265			ifq->ifq_qdrops++;
266			ml_enqueue(&free_ml, m);
267		} else
268			ifq->ifq_len++;
269	}
270	mtx_leave(&ifq->ifq_mtx);
271
272	oldops->ifqop_free(ifq->ifq_idx, oldq);
273
274	ml_purge(&free_ml);
275}
276
277void
278ifq_destroy(struct ifqueue *ifq)
279{
280	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
281
282	ifq_barrier(ifq); /* ensure nothing is running with the ifq */
283
284	/* don't need to lock because this is the last use of the ifq */
285
286	ifq->ifq_ops->ifqop_purge(ifq, &ml);
287	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
288
289	ml_purge(&ml);
290}
291
292void
293ifq_add_data(struct ifqueue *ifq, struct if_data *data)
294{
295	mtx_enter(&ifq->ifq_mtx);
296	data->ifi_opackets += ifq->ifq_packets;
297	data->ifi_obytes += ifq->ifq_bytes;
298	data->ifi_oqdrops += ifq->ifq_qdrops;
299	data->ifi_omcasts += ifq->ifq_mcasts;
300	/* ifp->if_data.ifi_oerrors */
301	mtx_leave(&ifq->ifq_mtx);
302}
303
304int
305ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
306{
307	struct mbuf *dm;
308
309	mtx_enter(&ifq->ifq_mtx);
310	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
311	if (dm != m) {
312		ifq->ifq_packets++;
313		ifq->ifq_bytes += m->m_pkthdr.len;
314		if (ISSET(m->m_flags, M_MCAST))
315			ifq->ifq_mcasts++;
316	}
317
318	if (dm == NULL)
319		ifq->ifq_len++;
320	else
321		ifq->ifq_qdrops++;
322	mtx_leave(&ifq->ifq_mtx);
323
324	if (dm != NULL)
325		m_freem(dm);
326
327	return (dm == m ? ENOBUFS : 0);
328}
329
330static inline void
331ifq_deq_enter(struct ifqueue *ifq)
332{
333	mtx_enter(&ifq->ifq_mtx);
334}
335
336static inline void
337ifq_deq_leave(struct ifqueue *ifq)
338{
339	struct mbuf_list ml;
340
341	ml = ifq->ifq_free;
342	ml_init(&ifq->ifq_free);
343
344	mtx_leave(&ifq->ifq_mtx);
345
346	if (!ml_empty(&ml))
347		ml_purge(&ml);
348}
349
350struct mbuf *
351ifq_deq_begin(struct ifqueue *ifq)
352{
353	struct mbuf *m = NULL;
354	void *cookie;
355
356	ifq_deq_enter(ifq);
357	if (ifq->ifq_len == 0 ||
358	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
359		ifq_deq_leave(ifq);
360		return (NULL);
361	}
362
363	m->m_pkthdr.ph_cookie = cookie;
364
365	return (m);
366}
367
368void
369ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
370{
371	void *cookie;
372
373	KASSERT(m != NULL);
374	cookie = m->m_pkthdr.ph_cookie;
375
376	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
377	ifq->ifq_len--;
378	ifq_deq_leave(ifq);
379}
380
381void
382ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
383{
384	KASSERT(m != NULL);
385
386	ifq_deq_leave(ifq);
387}
388
389struct mbuf *
390ifq_dequeue(struct ifqueue *ifq)
391{
392	struct mbuf *m;
393
394	m = ifq_deq_begin(ifq);
395	if (m == NULL)
396		return (NULL);
397
398	ifq_deq_commit(ifq, m);
399
400	return (m);
401}
402
403unsigned int
404ifq_purge(struct ifqueue *ifq)
405{
406	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
407	unsigned int rv;
408
409	mtx_enter(&ifq->ifq_mtx);
410	ifq->ifq_ops->ifqop_purge(ifq, &ml);
411	rv = ifq->ifq_len;
412	ifq->ifq_len = 0;
413	ifq->ifq_qdrops += rv;
414	mtx_leave(&ifq->ifq_mtx);
415
416	KASSERT(rv == ml_len(&ml));
417
418	ml_purge(&ml);
419
420	return (rv);
421}
422
423void *
424ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
425{
426	mtx_enter(&ifq->ifq_mtx);
427	if (ifq->ifq_ops == ops)
428		return (ifq->ifq_q);
429
430	mtx_leave(&ifq->ifq_mtx);
431
432	return (NULL);
433}
434
435void
436ifq_q_leave(struct ifqueue *ifq, void *q)
437{
438	KASSERT(q == ifq->ifq_q);
439	mtx_leave(&ifq->ifq_mtx);
440}
441
442void
443ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
444{
445	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
446
447	ifq->ifq_len--;
448	ifq->ifq_qdrops++;
449	ml_enqueue(&ifq->ifq_free, m);
450}
451
452void
453ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
454{
455	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
456
457	ifq->ifq_len -= ml_len(ml);
458	ifq->ifq_qdrops += ml_len(ml);
459	ml_enlist(&ifq->ifq_free, ml);
460}
461
462/*
463 * priq implementation
464 */
465
466unsigned int
467priq_idx(unsigned int nqueues, const struct mbuf *m)
468{
469	unsigned int flow = 0;
470
471	if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID))
472		flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK;
473
474	return (flow % nqueues);
475}
476
477void *
478priq_alloc(unsigned int idx, void *null)
479{
480	struct priq *pq;
481	int i;
482
483	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
484	for (i = 0; i < IFQ_NQUEUES; i++)
485		ml_init(&pq->pq_lists[i]);
486	return (pq);
487}
488
489void
490priq_free(unsigned int idx, void *pq)
491{
492	free(pq, M_DEVBUF, sizeof(struct priq));
493}
494
495struct mbuf *
496priq_enq(struct ifqueue *ifq, struct mbuf *m)
497{
498	struct priq *pq;
499	struct mbuf_list *pl;
500	struct mbuf *n = NULL;
501	unsigned int prio;
502
503	pq = ifq->ifq_q;
504	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
505
506	/* Find a lower priority queue to drop from */
507	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
508		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
509			pl = &pq->pq_lists[prio];
510			if (ml_len(pl) > 0) {
511				n = ml_dequeue(pl);
512				goto enqueue;
513			}
514		}
515		/*
516		 * There's no lower priority queue that we can
517		 * drop from so don't enqueue this one.
518		 */
519		return (m);
520	}
521
522 enqueue:
523	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
524	ml_enqueue(pl, m);
525
526	return (n);
527}
528
529struct mbuf *
530priq_deq_begin(struct ifqueue *ifq, void **cookiep)
531{
532	struct priq *pq = ifq->ifq_q;
533	struct mbuf_list *pl;
534	unsigned int prio = nitems(pq->pq_lists);
535	struct mbuf *m;
536
537	do {
538		pl = &pq->pq_lists[--prio];
539		m = MBUF_LIST_FIRST(pl);
540		if (m != NULL) {
541			*cookiep = pl;
542			return (m);
543		}
544	} while (prio > 0);
545
546	return (NULL);
547}
548
549void
550priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
551{
552	struct mbuf_list *pl = cookie;
553
554	KASSERT(MBUF_LIST_FIRST(pl) == m);
555
556	ml_dequeue(pl);
557}
558
559void
560priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
561{
562	struct priq *pq = ifq->ifq_q;
563	struct mbuf_list *pl;
564	unsigned int prio = nitems(pq->pq_lists);
565
566	do {
567		pl = &pq->pq_lists[--prio];
568		ml_enlist(ml, pl);
569	} while (prio > 0);
570}
571