1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2012 Red Hat, Inc.
4 *
5 * This file is released under the GPL.
6 */
7
8#include "dm.h"
9#include "dm-bio-prison-v1.h"
10#include "dm-bio-prison-v2.h"
11
12#include <linux/spinlock.h>
13#include <linux/mempool.h>
14#include <linux/module.h>
15#include <linux/slab.h>
16
17/*----------------------------------------------------------------*/
18
19#define MIN_CELLS 1024
20
21struct prison_region {
22	spinlock_t lock;
23	struct rb_root cell;
24} ____cacheline_aligned_in_smp;
25
26struct dm_bio_prison {
27	mempool_t cell_pool;
28	unsigned int num_locks;
29	struct prison_region regions[] __counted_by(num_locks);
30};
31
32static struct kmem_cache *_cell_cache;
33
34/*----------------------------------------------------------------*/
35
36/*
37 * @nr_cells should be the number of cells you want in use _concurrently_.
38 * Don't confuse it with the number of distinct keys.
39 */
40struct dm_bio_prison *dm_bio_prison_create(void)
41{
42	int ret;
43	unsigned int i, num_locks;
44	struct dm_bio_prison *prison;
45
46	num_locks = dm_num_hash_locks();
47	prison = kzalloc(struct_size(prison, regions, num_locks), GFP_KERNEL);
48	if (!prison)
49		return NULL;
50	prison->num_locks = num_locks;
51
52	for (i = 0; i < prison->num_locks; i++) {
53		spin_lock_init(&prison->regions[i].lock);
54		prison->regions[i].cell = RB_ROOT;
55	}
56
57	ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache);
58	if (ret) {
59		kfree(prison);
60		return NULL;
61	}
62
63	return prison;
64}
65EXPORT_SYMBOL_GPL(dm_bio_prison_create);
66
67void dm_bio_prison_destroy(struct dm_bio_prison *prison)
68{
69	mempool_exit(&prison->cell_pool);
70	kfree(prison);
71}
72EXPORT_SYMBOL_GPL(dm_bio_prison_destroy);
73
74struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp)
75{
76	return mempool_alloc(&prison->cell_pool, gfp);
77}
78EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell);
79
80void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
81			     struct dm_bio_prison_cell *cell)
82{
83	mempool_free(cell, &prison->cell_pool);
84}
85EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
86
87static void __setup_new_cell(struct dm_cell_key *key,
88			     struct bio *holder,
89			     struct dm_bio_prison_cell *cell)
90{
91	memcpy(&cell->key, key, sizeof(cell->key));
92	cell->holder = holder;
93	bio_list_init(&cell->bios);
94}
95
96static int cmp_keys(struct dm_cell_key *lhs,
97		    struct dm_cell_key *rhs)
98{
99	if (lhs->virtual < rhs->virtual)
100		return -1;
101
102	if (lhs->virtual > rhs->virtual)
103		return 1;
104
105	if (lhs->dev < rhs->dev)
106		return -1;
107
108	if (lhs->dev > rhs->dev)
109		return 1;
110
111	if (lhs->block_end <= rhs->block_begin)
112		return -1;
113
114	if (lhs->block_begin >= rhs->block_end)
115		return 1;
116
117	return 0;
118}
119
120static inline unsigned int lock_nr(struct dm_cell_key *key, unsigned int num_locks)
121{
122	return dm_hash_locks_index((key->block_begin >> BIO_PRISON_MAX_RANGE_SHIFT),
123				   num_locks);
124}
125
126bool dm_cell_key_has_valid_range(struct dm_cell_key *key)
127{
128	if (WARN_ON_ONCE(key->block_end - key->block_begin > BIO_PRISON_MAX_RANGE))
129		return false;
130	if (WARN_ON_ONCE((key->block_begin >> BIO_PRISON_MAX_RANGE_SHIFT) !=
131			 (key->block_end - 1) >> BIO_PRISON_MAX_RANGE_SHIFT))
132		return false;
133
134	return true;
135}
136EXPORT_SYMBOL(dm_cell_key_has_valid_range);
137
138static int __bio_detain(struct rb_root *root,
139			struct dm_cell_key *key,
140			struct bio *inmate,
141			struct dm_bio_prison_cell *cell_prealloc,
142			struct dm_bio_prison_cell **cell_result)
143{
144	int r;
145	struct rb_node **new = &root->rb_node, *parent = NULL;
146
147	while (*new) {
148		struct dm_bio_prison_cell *cell =
149			rb_entry(*new, struct dm_bio_prison_cell, node);
150
151		r = cmp_keys(key, &cell->key);
152
153		parent = *new;
154		if (r < 0)
155			new = &((*new)->rb_left);
156		else if (r > 0)
157			new = &((*new)->rb_right);
158		else {
159			if (inmate)
160				bio_list_add(&cell->bios, inmate);
161			*cell_result = cell;
162			return 1;
163		}
164	}
165
166	__setup_new_cell(key, inmate, cell_prealloc);
167	*cell_result = cell_prealloc;
168
169	rb_link_node(&cell_prealloc->node, parent, new);
170	rb_insert_color(&cell_prealloc->node, root);
171
172	return 0;
173}
174
175static int bio_detain(struct dm_bio_prison *prison,
176		      struct dm_cell_key *key,
177		      struct bio *inmate,
178		      struct dm_bio_prison_cell *cell_prealloc,
179		      struct dm_bio_prison_cell **cell_result)
180{
181	int r;
182	unsigned l = lock_nr(key, prison->num_locks);
183
184	spin_lock_irq(&prison->regions[l].lock);
185	r = __bio_detain(&prison->regions[l].cell, key, inmate, cell_prealloc, cell_result);
186	spin_unlock_irq(&prison->regions[l].lock);
187
188	return r;
189}
190
191int dm_bio_detain(struct dm_bio_prison *prison,
192		  struct dm_cell_key *key,
193		  struct bio *inmate,
194		  struct dm_bio_prison_cell *cell_prealloc,
195		  struct dm_bio_prison_cell **cell_result)
196{
197	return bio_detain(prison, key, inmate, cell_prealloc, cell_result);
198}
199EXPORT_SYMBOL_GPL(dm_bio_detain);
200
201int dm_get_cell(struct dm_bio_prison *prison,
202		struct dm_cell_key *key,
203		struct dm_bio_prison_cell *cell_prealloc,
204		struct dm_bio_prison_cell **cell_result)
205{
206	return bio_detain(prison, key, NULL, cell_prealloc, cell_result);
207}
208EXPORT_SYMBOL_GPL(dm_get_cell);
209
210/*
211 * @inmates must have been initialised prior to this call
212 */
213static void __cell_release(struct rb_root *root,
214			   struct dm_bio_prison_cell *cell,
215			   struct bio_list *inmates)
216{
217	rb_erase(&cell->node, root);
218
219	if (inmates) {
220		if (cell->holder)
221			bio_list_add(inmates, cell->holder);
222		bio_list_merge(inmates, &cell->bios);
223	}
224}
225
226void dm_cell_release(struct dm_bio_prison *prison,
227		     struct dm_bio_prison_cell *cell,
228		     struct bio_list *bios)
229{
230	unsigned l = lock_nr(&cell->key, prison->num_locks);
231
232	spin_lock_irq(&prison->regions[l].lock);
233	__cell_release(&prison->regions[l].cell, cell, bios);
234	spin_unlock_irq(&prison->regions[l].lock);
235}
236EXPORT_SYMBOL_GPL(dm_cell_release);
237
238/*
239 * Sometimes we don't want the holder, just the additional bios.
240 */
241static void __cell_release_no_holder(struct rb_root *root,
242				     struct dm_bio_prison_cell *cell,
243				     struct bio_list *inmates)
244{
245	rb_erase(&cell->node, root);
246	bio_list_merge(inmates, &cell->bios);
247}
248
249void dm_cell_release_no_holder(struct dm_bio_prison *prison,
250			       struct dm_bio_prison_cell *cell,
251			       struct bio_list *inmates)
252{
253	unsigned l = lock_nr(&cell->key, prison->num_locks);
254	unsigned long flags;
255
256	spin_lock_irqsave(&prison->regions[l].lock, flags);
257	__cell_release_no_holder(&prison->regions[l].cell, cell, inmates);
258	spin_unlock_irqrestore(&prison->regions[l].lock, flags);
259}
260EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
261
262void dm_cell_error(struct dm_bio_prison *prison,
263		   struct dm_bio_prison_cell *cell, blk_status_t error)
264{
265	struct bio_list bios;
266	struct bio *bio;
267
268	bio_list_init(&bios);
269	dm_cell_release(prison, cell, &bios);
270
271	while ((bio = bio_list_pop(&bios))) {
272		bio->bi_status = error;
273		bio_endio(bio);
274	}
275}
276EXPORT_SYMBOL_GPL(dm_cell_error);
277
278void dm_cell_visit_release(struct dm_bio_prison *prison,
279			   void (*visit_fn)(void *, struct dm_bio_prison_cell *),
280			   void *context,
281			   struct dm_bio_prison_cell *cell)
282{
283	unsigned l = lock_nr(&cell->key, prison->num_locks);
284	spin_lock_irq(&prison->regions[l].lock);
285	visit_fn(context, cell);
286	rb_erase(&cell->node, &prison->regions[l].cell);
287	spin_unlock_irq(&prison->regions[l].lock);
288}
289EXPORT_SYMBOL_GPL(dm_cell_visit_release);
290
291static int __promote_or_release(struct rb_root *root,
292				struct dm_bio_prison_cell *cell)
293{
294	if (bio_list_empty(&cell->bios)) {
295		rb_erase(&cell->node, root);
296		return 1;
297	}
298
299	cell->holder = bio_list_pop(&cell->bios);
300	return 0;
301}
302
303int dm_cell_promote_or_release(struct dm_bio_prison *prison,
304			       struct dm_bio_prison_cell *cell)
305{
306	int r;
307	unsigned l = lock_nr(&cell->key, prison->num_locks);
308
309	spin_lock_irq(&prison->regions[l].lock);
310	r = __promote_or_release(&prison->regions[l].cell, cell);
311	spin_unlock_irq(&prison->regions[l].lock);
312
313	return r;
314}
315EXPORT_SYMBOL_GPL(dm_cell_promote_or_release);
316
317/*----------------------------------------------------------------*/
318
319#define DEFERRED_SET_SIZE 64
320
321struct dm_deferred_entry {
322	struct dm_deferred_set *ds;
323	unsigned int count;
324	struct list_head work_items;
325};
326
327struct dm_deferred_set {
328	spinlock_t lock;
329	unsigned int current_entry;
330	unsigned int sweeper;
331	struct dm_deferred_entry entries[DEFERRED_SET_SIZE];
332};
333
334struct dm_deferred_set *dm_deferred_set_create(void)
335{
336	int i;
337	struct dm_deferred_set *ds;
338
339	ds = kmalloc(sizeof(*ds), GFP_KERNEL);
340	if (!ds)
341		return NULL;
342
343	spin_lock_init(&ds->lock);
344	ds->current_entry = 0;
345	ds->sweeper = 0;
346	for (i = 0; i < DEFERRED_SET_SIZE; i++) {
347		ds->entries[i].ds = ds;
348		ds->entries[i].count = 0;
349		INIT_LIST_HEAD(&ds->entries[i].work_items);
350	}
351
352	return ds;
353}
354EXPORT_SYMBOL_GPL(dm_deferred_set_create);
355
356void dm_deferred_set_destroy(struct dm_deferred_set *ds)
357{
358	kfree(ds);
359}
360EXPORT_SYMBOL_GPL(dm_deferred_set_destroy);
361
362struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds)
363{
364	unsigned long flags;
365	struct dm_deferred_entry *entry;
366
367	spin_lock_irqsave(&ds->lock, flags);
368	entry = ds->entries + ds->current_entry;
369	entry->count++;
370	spin_unlock_irqrestore(&ds->lock, flags);
371
372	return entry;
373}
374EXPORT_SYMBOL_GPL(dm_deferred_entry_inc);
375
376static unsigned int ds_next(unsigned int index)
377{
378	return (index + 1) % DEFERRED_SET_SIZE;
379}
380
381static void __sweep(struct dm_deferred_set *ds, struct list_head *head)
382{
383	while ((ds->sweeper != ds->current_entry) &&
384	       !ds->entries[ds->sweeper].count) {
385		list_splice_init(&ds->entries[ds->sweeper].work_items, head);
386		ds->sweeper = ds_next(ds->sweeper);
387	}
388
389	if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count)
390		list_splice_init(&ds->entries[ds->sweeper].work_items, head);
391}
392
393void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head)
394{
395	unsigned long flags;
396
397	spin_lock_irqsave(&entry->ds->lock, flags);
398	BUG_ON(!entry->count);
399	--entry->count;
400	__sweep(entry->ds, head);
401	spin_unlock_irqrestore(&entry->ds->lock, flags);
402}
403EXPORT_SYMBOL_GPL(dm_deferred_entry_dec);
404
405/*
406 * Returns 1 if deferred or 0 if no pending items to delay job.
407 */
408int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work)
409{
410	int r = 1;
411	unsigned int next_entry;
412
413	spin_lock_irq(&ds->lock);
414	if ((ds->sweeper == ds->current_entry) &&
415	    !ds->entries[ds->current_entry].count)
416		r = 0;
417	else {
418		list_add(work, &ds->entries[ds->current_entry].work_items);
419		next_entry = ds_next(ds->current_entry);
420		if (!ds->entries[next_entry].count)
421			ds->current_entry = next_entry;
422	}
423	spin_unlock_irq(&ds->lock);
424
425	return r;
426}
427EXPORT_SYMBOL_GPL(dm_deferred_set_add_work);
428
429/*----------------------------------------------------------------*/
430
431static int __init dm_bio_prison_init_v1(void)
432{
433	_cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0);
434	if (!_cell_cache)
435		return -ENOMEM;
436
437	return 0;
438}
439
440static void dm_bio_prison_exit_v1(void)
441{
442	kmem_cache_destroy(_cell_cache);
443	_cell_cache = NULL;
444}
445
446static int (*_inits[])(void) __initdata = {
447	dm_bio_prison_init_v1,
448	dm_bio_prison_init_v2,
449};
450
451static void (*_exits[])(void) = {
452	dm_bio_prison_exit_v1,
453	dm_bio_prison_exit_v2,
454};
455
456static int __init dm_bio_prison_init(void)
457{
458	const int count = ARRAY_SIZE(_inits);
459
460	int r, i;
461
462	for (i = 0; i < count; i++) {
463		r = _inits[i]();
464		if (r)
465			goto bad;
466	}
467
468	return 0;
469
470bad:
471	while (i--)
472		_exits[i]();
473
474	return r;
475}
476
477static void __exit dm_bio_prison_exit(void)
478{
479	int i = ARRAY_SIZE(_exits);
480
481	while (i--)
482		_exits[i]();
483}
484
485/*
486 * module hooks
487 */
488module_init(dm_bio_prison_init);
489module_exit(dm_bio_prison_exit);
490
491MODULE_DESCRIPTION(DM_NAME " bio prison");
492MODULE_AUTHOR("Joe Thornber <dm-devel@lists.linux.dev>");
493MODULE_LICENSE("GPL");
494