1/*-
2 * Copyright (c) 2017-2019 Hans Petter Selasky
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/compat/linuxkpi/common/src/linux_work.c 352479 2019-09-18 07:29:33Z hselasky $");
29
30#include <linux/workqueue.h>
31#include <linux/wait.h>
32#include <linux/compat.h>
33#include <linux/spinlock.h>
34
35#include <sys/kernel.h>
36
37/*
38 * Define all work struct states
39 */
40enum {
41	WORK_ST_IDLE,			/* idle - not started */
42	WORK_ST_TIMER,			/* timer is being started */
43	WORK_ST_TASK,			/* taskqueue is being queued */
44	WORK_ST_EXEC,			/* callback is being called */
45	WORK_ST_CANCEL,			/* cancel is being requested */
46	WORK_ST_MAX,
47};
48
49/*
50 * Define global workqueues
51 */
52static struct workqueue_struct *linux_system_short_wq;
53static struct workqueue_struct *linux_system_long_wq;
54
55struct workqueue_struct *system_wq;
56struct workqueue_struct *system_long_wq;
57struct workqueue_struct *system_unbound_wq;
58struct workqueue_struct *system_highpri_wq;
59struct workqueue_struct *system_power_efficient_wq;
60
61static int linux_default_wq_cpus = 4;
62
63static void linux_delayed_work_timer_fn(void *);
64
65/*
66 * This function atomically updates the work state and returns the
67 * previous state at the time of update.
68 */
69static uint8_t
70linux_update_state(atomic_t *v, const uint8_t *pstate)
71{
72	int c, old;
73
74	c = v->counter;
75
76	while ((old = atomic_cmpxchg(v, c, pstate[c])) != c)
77		c = old;
78
79	return (c);
80}
81
82/*
83 * A LinuxKPI task is allowed to free itself inside the callback function
84 * and cannot safely be referred after the callback function has
85 * completed. This function gives the linux_work_fn() function a hint,
86 * that the task is not going away and can have its state checked
87 * again. Without this extra hint LinuxKPI tasks cannot be serialized
88 * accross multiple worker threads.
89 */
90static bool
91linux_work_exec_unblock(struct work_struct *work)
92{
93	struct workqueue_struct *wq;
94	struct work_exec *exec;
95	bool retval = false;
96
97	wq = work->work_queue;
98	if (unlikely(wq == NULL))
99		goto done;
100
101	WQ_EXEC_LOCK(wq);
102	TAILQ_FOREACH(exec, &wq->exec_head, entry) {
103		if (exec->target == work) {
104			exec->target = NULL;
105			retval = true;
106			break;
107		}
108	}
109	WQ_EXEC_UNLOCK(wq);
110done:
111	return (retval);
112}
113
114static void
115linux_delayed_work_enqueue(struct delayed_work *dwork)
116{
117	struct taskqueue *tq;
118
119	tq = dwork->work.work_queue->taskqueue;
120	taskqueue_enqueue(tq, &dwork->work.work_task);
121}
122
123/*
124 * This function queues the given work structure on the given
125 * workqueue. It returns non-zero if the work was successfully
126 * [re-]queued. Else the work is already pending for completion.
127 */
128bool
129linux_queue_work_on(int cpu __unused, struct workqueue_struct *wq,
130    struct work_struct *work)
131{
132	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
133		[WORK_ST_IDLE] = WORK_ST_TASK,		/* start queuing task */
134		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* NOP */
135		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
136		[WORK_ST_EXEC] = WORK_ST_TASK,		/* queue task another time */
137		[WORK_ST_CANCEL] = WORK_ST_TASK,	/* start queuing task again */
138	};
139
140	if (atomic_read(&wq->draining) != 0)
141		return (!work_pending(work));
142
143	switch (linux_update_state(&work->state, states)) {
144	case WORK_ST_EXEC:
145	case WORK_ST_CANCEL:
146		if (linux_work_exec_unblock(work) != 0)
147			return (true);
148		/* FALLTHROUGH */
149	case WORK_ST_IDLE:
150		work->work_queue = wq;
151		taskqueue_enqueue(wq->taskqueue, &work->work_task);
152		return (true);
153	default:
154		return (false);		/* already on a queue */
155	}
156}
157
158/*
159 * This function queues the given work structure on the given
160 * workqueue after a given delay in ticks. It returns non-zero if the
161 * work was successfully [re-]queued. Else the work is already pending
162 * for completion.
163 */
164bool
165linux_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
166    struct delayed_work *dwork, unsigned delay)
167{
168	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
169		[WORK_ST_IDLE] = WORK_ST_TIMER,		/* start timeout */
170		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* NOP */
171		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
172		[WORK_ST_EXEC] = WORK_ST_TIMER,		/* start timeout */
173		[WORK_ST_CANCEL] = WORK_ST_TIMER,	/* start timeout */
174	};
175
176	if (atomic_read(&wq->draining) != 0)
177		return (!work_pending(&dwork->work));
178
179	switch (linux_update_state(&dwork->work.state, states)) {
180	case WORK_ST_EXEC:
181	case WORK_ST_CANCEL:
182		if (delay == 0 && linux_work_exec_unblock(&dwork->work) != 0) {
183			dwork->timer.expires = jiffies;
184			return (true);
185		}
186		/* FALLTHROUGH */
187	case WORK_ST_IDLE:
188		dwork->work.work_queue = wq;
189		dwork->timer.expires = jiffies + delay;
190
191		if (delay == 0) {
192			linux_delayed_work_enqueue(dwork);
193		} else if (unlikely(cpu != WORK_CPU_UNBOUND)) {
194			mtx_lock(&dwork->timer.mtx);
195			callout_reset_on(&dwork->timer.callout, delay,
196			    &linux_delayed_work_timer_fn, dwork, cpu);
197			mtx_unlock(&dwork->timer.mtx);
198		} else {
199			mtx_lock(&dwork->timer.mtx);
200			callout_reset(&dwork->timer.callout, delay,
201			    &linux_delayed_work_timer_fn, dwork);
202			mtx_unlock(&dwork->timer.mtx);
203		}
204		return (true);
205	default:
206		return (false);		/* already on a queue */
207	}
208}
209
210void
211linux_work_fn(void *context, int pending)
212{
213	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
214		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
215		[WORK_ST_TIMER] = WORK_ST_EXEC,		/* delayed work w/o timeout */
216		[WORK_ST_TASK] = WORK_ST_EXEC,		/* call callback */
217		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* complete callback */
218		[WORK_ST_CANCEL] = WORK_ST_EXEC,	/* failed to cancel */
219	};
220	struct work_struct *work;
221	struct workqueue_struct *wq;
222	struct work_exec exec;
223	struct task_struct *task;
224
225	task = current;
226
227	/* setup local variables */
228	work = context;
229	wq = work->work_queue;
230
231	/* store target pointer */
232	exec.target = work;
233
234	/* insert executor into list */
235	WQ_EXEC_LOCK(wq);
236	TAILQ_INSERT_TAIL(&wq->exec_head, &exec, entry);
237	while (1) {
238		switch (linux_update_state(&work->state, states)) {
239		case WORK_ST_TIMER:
240		case WORK_ST_TASK:
241		case WORK_ST_CANCEL:
242			WQ_EXEC_UNLOCK(wq);
243
244			/* set current work structure */
245			task->work = work;
246
247			/* call work function */
248			work->func(work);
249
250			/* set current work structure */
251			task->work = NULL;
252
253			WQ_EXEC_LOCK(wq);
254			/* check if unblocked */
255			if (exec.target != work) {
256				/* reapply block */
257				exec.target = work;
258				break;
259			}
260			/* FALLTHROUGH */
261		default:
262			goto done;
263		}
264	}
265done:
266	/* remove executor from list */
267	TAILQ_REMOVE(&wq->exec_head, &exec, entry);
268	WQ_EXEC_UNLOCK(wq);
269}
270
271void
272linux_delayed_work_fn(void *context, int pending)
273{
274	struct delayed_work *dwork = context;
275
276	/*
277	 * Make sure the timer belonging to the delayed work gets
278	 * drained before invoking the work function. Else the timer
279	 * mutex may still be in use which can lead to use-after-free
280	 * situations, because the work function might free the work
281	 * structure before returning.
282	 */
283	callout_drain(&dwork->timer.callout);
284
285	linux_work_fn(&dwork->work, pending);
286}
287
288static void
289linux_delayed_work_timer_fn(void *arg)
290{
291	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
292		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
293		[WORK_ST_TIMER] = WORK_ST_TASK,		/* start queueing task */
294		[WORK_ST_TASK] = WORK_ST_TASK,		/* NOP */
295		[WORK_ST_EXEC] = WORK_ST_EXEC,		/* NOP */
296		[WORK_ST_CANCEL] = WORK_ST_TASK,	/* failed to cancel */
297	};
298	struct delayed_work *dwork = arg;
299
300	switch (linux_update_state(&dwork->work.state, states)) {
301	case WORK_ST_TIMER:
302	case WORK_ST_CANCEL:
303		linux_delayed_work_enqueue(dwork);
304		break;
305	default:
306		break;
307	}
308}
309
310/*
311 * This function cancels the given work structure in a synchronous
312 * fashion. It returns non-zero if the work was successfully
313 * cancelled. Else the work was already cancelled.
314 */
315bool
316linux_cancel_work_sync(struct work_struct *work)
317{
318	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
319		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
320		[WORK_ST_TIMER] = WORK_ST_TIMER,	/* can't happen */
321		[WORK_ST_TASK] = WORK_ST_IDLE,		/* cancel and drain */
322		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* too late, drain */
323		[WORK_ST_CANCEL] = WORK_ST_IDLE,	/* cancel and drain */
324	};
325	struct taskqueue *tq;
326	bool retval = false;
327
328	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
329	    "linux_cancel_work_sync() might sleep");
330retry:
331	switch (linux_update_state(&work->state, states)) {
332	case WORK_ST_IDLE:
333	case WORK_ST_TIMER:
334		return (retval);
335	case WORK_ST_EXEC:
336		tq = work->work_queue->taskqueue;
337		if (taskqueue_cancel(tq, &work->work_task, NULL) != 0)
338			taskqueue_drain(tq, &work->work_task);
339		goto retry;	/* work may have restarted itself */
340	default:
341		tq = work->work_queue->taskqueue;
342		if (taskqueue_cancel(tq, &work->work_task, NULL) != 0)
343			taskqueue_drain(tq, &work->work_task);
344		retval = true;
345		goto retry;
346	}
347}
348
349/*
350 * This function atomically stops the timer and callback. The timer
351 * callback will not be called after this function returns. This
352 * functions returns true when the timeout was cancelled. Else the
353 * timeout was not started or has already been called.
354 */
355static inline bool
356linux_cancel_timer(struct delayed_work *dwork, bool drain)
357{
358	bool cancelled;
359
360	mtx_lock(&dwork->timer.mtx);
361	cancelled = (callout_stop(&dwork->timer.callout) == 1);
362	mtx_unlock(&dwork->timer.mtx);
363
364	/* check if we should drain */
365	if (drain)
366		callout_drain(&dwork->timer.callout);
367	return (cancelled);
368}
369
370/*
371 * This function cancels the given delayed work structure in a
372 * non-blocking fashion. It returns non-zero if the work was
373 * successfully cancelled. Else the work may still be busy or already
374 * cancelled.
375 */
376bool
377linux_cancel_delayed_work(struct delayed_work *dwork)
378{
379	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
380		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
381		[WORK_ST_TIMER] = WORK_ST_CANCEL,	/* try to cancel */
382		[WORK_ST_TASK] = WORK_ST_CANCEL,	/* try to cancel */
383		[WORK_ST_EXEC] = WORK_ST_EXEC,		/* NOP */
384		[WORK_ST_CANCEL] = WORK_ST_CANCEL,	/* NOP */
385	};
386	struct taskqueue *tq;
387
388	switch (linux_update_state(&dwork->work.state, states)) {
389	case WORK_ST_TIMER:
390	case WORK_ST_CANCEL:
391		if (linux_cancel_timer(dwork, 0)) {
392			atomic_cmpxchg(&dwork->work.state,
393			    WORK_ST_CANCEL, WORK_ST_IDLE);
394			return (true);
395		}
396		/* FALLTHROUGH */
397	case WORK_ST_TASK:
398		tq = dwork->work.work_queue->taskqueue;
399		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) == 0) {
400			atomic_cmpxchg(&dwork->work.state,
401			    WORK_ST_CANCEL, WORK_ST_IDLE);
402			return (true);
403		}
404		/* FALLTHROUGH */
405	default:
406		return (false);
407	}
408}
409
410/*
411 * This function cancels the given work structure in a synchronous
412 * fashion. It returns non-zero if the work was successfully
413 * cancelled. Else the work was already cancelled.
414 */
415bool
416linux_cancel_delayed_work_sync(struct delayed_work *dwork)
417{
418	static const uint8_t states[WORK_ST_MAX] __aligned(8) = {
419		[WORK_ST_IDLE] = WORK_ST_IDLE,		/* NOP */
420		[WORK_ST_TIMER] = WORK_ST_IDLE,		/* cancel and drain */
421		[WORK_ST_TASK] = WORK_ST_IDLE,		/* cancel and drain */
422		[WORK_ST_EXEC] = WORK_ST_IDLE,		/* too late, drain */
423		[WORK_ST_CANCEL] = WORK_ST_IDLE,	/* cancel and drain */
424	};
425	struct taskqueue *tq;
426	bool retval = false;
427
428	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
429	    "linux_cancel_delayed_work_sync() might sleep");
430retry:
431	switch (linux_update_state(&dwork->work.state, states)) {
432	case WORK_ST_IDLE:
433		return (retval);
434	case WORK_ST_EXEC:
435		tq = dwork->work.work_queue->taskqueue;
436		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0)
437			taskqueue_drain(tq, &dwork->work.work_task);
438		goto retry;	/* work may have restarted itself */
439	case WORK_ST_TIMER:
440	case WORK_ST_CANCEL:
441		if (linux_cancel_timer(dwork, 1)) {
442			/*
443			 * Make sure taskqueue is also drained before
444			 * returning:
445			 */
446			tq = dwork->work.work_queue->taskqueue;
447			taskqueue_drain(tq, &dwork->work.work_task);
448			retval = true;
449			goto retry;
450		}
451		/* FALLTHROUGH */
452	default:
453		tq = dwork->work.work_queue->taskqueue;
454		if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0)
455			taskqueue_drain(tq, &dwork->work.work_task);
456		retval = true;
457		goto retry;
458	}
459}
460
461/*
462 * This function waits until the given work structure is completed.
463 * It returns non-zero if the work was successfully
464 * waited for. Else the work was not waited for.
465 */
466bool
467linux_flush_work(struct work_struct *work)
468{
469	struct taskqueue *tq;
470	bool retval;
471
472	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
473	    "linux_flush_work() might sleep");
474
475	switch (atomic_read(&work->state)) {
476	case WORK_ST_IDLE:
477		return (false);
478	default:
479		tq = work->work_queue->taskqueue;
480		retval = taskqueue_poll_is_busy(tq, &work->work_task);
481		taskqueue_drain(tq, &work->work_task);
482		return (retval);
483	}
484}
485
486/*
487 * This function waits until the given delayed work structure is
488 * completed. It returns non-zero if the work was successfully waited
489 * for. Else the work was not waited for.
490 */
491bool
492linux_flush_delayed_work(struct delayed_work *dwork)
493{
494	struct taskqueue *tq;
495	bool retval;
496
497	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
498	    "linux_flush_delayed_work() might sleep");
499
500	switch (atomic_read(&dwork->work.state)) {
501	case WORK_ST_IDLE:
502		return (false);
503	case WORK_ST_TIMER:
504		if (linux_cancel_timer(dwork, 1))
505			linux_delayed_work_enqueue(dwork);
506		/* FALLTHROUGH */
507	default:
508		tq = dwork->work.work_queue->taskqueue;
509		retval = taskqueue_poll_is_busy(tq, &dwork->work.work_task);
510		taskqueue_drain(tq, &dwork->work.work_task);
511		return (retval);
512	}
513}
514
515/*
516 * This function returns true if the given work is pending, and not
517 * yet executing:
518 */
519bool
520linux_work_pending(struct work_struct *work)
521{
522	switch (atomic_read(&work->state)) {
523	case WORK_ST_TIMER:
524	case WORK_ST_TASK:
525	case WORK_ST_CANCEL:
526		return (true);
527	default:
528		return (false);
529	}
530}
531
532/*
533 * This function returns true if the given work is busy.
534 */
535bool
536linux_work_busy(struct work_struct *work)
537{
538	struct taskqueue *tq;
539
540	switch (atomic_read(&work->state)) {
541	case WORK_ST_IDLE:
542		return (false);
543	case WORK_ST_EXEC:
544		tq = work->work_queue->taskqueue;
545		return (taskqueue_poll_is_busy(tq, &work->work_task));
546	default:
547		return (true);
548	}
549}
550
551struct workqueue_struct *
552linux_create_workqueue_common(const char *name, int cpus)
553{
554	struct workqueue_struct *wq;
555
556	/*
557	 * If zero CPUs are specified use the default number of CPUs:
558	 */
559	if (cpus == 0)
560		cpus = linux_default_wq_cpus;
561
562	wq = kmalloc(sizeof(*wq), M_WAITOK | M_ZERO);
563	wq->taskqueue = taskqueue_create(name, M_WAITOK,
564	    taskqueue_thread_enqueue, &wq->taskqueue);
565	atomic_set(&wq->draining, 0);
566	taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name);
567	TAILQ_INIT(&wq->exec_head);
568	mtx_init(&wq->exec_mtx, "linux_wq_exec", NULL, MTX_DEF);
569
570	return (wq);
571}
572
573void
574linux_destroy_workqueue(struct workqueue_struct *wq)
575{
576	atomic_inc(&wq->draining);
577	drain_workqueue(wq);
578	taskqueue_free(wq->taskqueue);
579	mtx_destroy(&wq->exec_mtx);
580	kfree(wq);
581}
582
583void
584linux_init_delayed_work(struct delayed_work *dwork, work_func_t func)
585{
586	memset(dwork, 0, sizeof(*dwork));
587	dwork->work.func = func;
588	TASK_INIT(&dwork->work.work_task, 0, linux_delayed_work_fn, dwork);
589	mtx_init(&dwork->timer.mtx, spin_lock_name("lkpi-dwork"), NULL,
590	    MTX_DEF | MTX_NOWITNESS);
591	callout_init_mtx(&dwork->timer.callout, &dwork->timer.mtx, 0);
592}
593
594struct work_struct *
595linux_current_work(void)
596{
597	return (current->work);
598}
599
600static void
601linux_work_init(void *arg)
602{
603	int max_wq_cpus = mp_ncpus + 1;
604
605	/* avoid deadlock when there are too few threads */
606	if (max_wq_cpus < 4)
607		max_wq_cpus = 4;
608
609	/* set default number of CPUs */
610	linux_default_wq_cpus = max_wq_cpus;
611
612	linux_system_short_wq = alloc_workqueue("linuxkpi_short_wq", 0, max_wq_cpus);
613	linux_system_long_wq = alloc_workqueue("linuxkpi_long_wq", 0, max_wq_cpus);
614
615	/* populate the workqueue pointers */
616	system_long_wq = linux_system_long_wq;
617	system_wq = linux_system_short_wq;
618	system_power_efficient_wq = linux_system_short_wq;
619	system_unbound_wq = linux_system_short_wq;
620	system_highpri_wq = linux_system_short_wq;
621}
622SYSINIT(linux_work_init, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_init, NULL);
623
624static void
625linux_work_uninit(void *arg)
626{
627	destroy_workqueue(linux_system_short_wq);
628	destroy_workqueue(linux_system_long_wq);
629
630	/* clear workqueue pointers */
631	system_long_wq = NULL;
632	system_wq = NULL;
633	system_power_efficient_wq = NULL;
634	system_unbound_wq = NULL;
635	system_highpri_wq = NULL;
636}
637SYSUNINIT(linux_work_uninit, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_uninit, NULL);
638