1/*-
2 * Copyright (c) 2010 Isilon Systems, Inc.
3 * Copyright (c) 2010 iX Systems, Inc.
4 * Copyright (c) 2010 Panasas, Inc.
5 * Copyright (c) 2013-2017 Mellanox Technologies, Ltd.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice unmodified, this list of conditions, and the following
13 *    disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/malloc.h>
36#include <sys/kernel.h>
37#include <sys/sysctl.h>
38#include <sys/lock.h>
39#include <sys/mutex.h>
40
41#include <machine/stdarg.h>
42
43#include <linux/bitmap.h>
44#include <linux/kobject.h>
45#include <linux/slab.h>
46#include <linux/idr.h>
47#include <linux/err.h>
48
49#define	MAX_IDR_LEVEL	((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS)
50#define	MAX_IDR_FREE	(MAX_IDR_LEVEL * 2)
51
52struct linux_idr_cache {
53	spinlock_t lock;
54	struct idr_layer *head;
55	unsigned count;
56};
57
58DPCPU_DEFINE_STATIC(struct linux_idr_cache, linux_idr_cache);
59
60/*
61 * IDR Implementation.
62 *
63 * This is quick and dirty and not as re-entrant as the linux version
64 * however it should be fairly fast.  It is basically a radix tree with
65 * a builtin bitmap for allocation.
66 */
67static MALLOC_DEFINE(M_IDR, "idr", "Linux IDR compat");
68
69static struct idr_layer *
70idr_preload_dequeue_locked(struct linux_idr_cache *lic)
71{
72	struct idr_layer *retval;
73
74	/* check if wrong thread is trying to dequeue */
75	if (mtx_owned(&lic->lock.m) == 0)
76		return (NULL);
77
78	retval = lic->head;
79	if (likely(retval != NULL)) {
80		lic->head = retval->ary[0];
81		lic->count--;
82		retval->ary[0] = NULL;
83	}
84	return (retval);
85}
86
87static void
88idr_preload_init(void *arg)
89{
90	int cpu;
91
92	CPU_FOREACH(cpu) {
93		struct linux_idr_cache *lic =
94		    DPCPU_ID_PTR(cpu, linux_idr_cache);
95
96		spin_lock_init(&lic->lock);
97	}
98}
99SYSINIT(idr_preload_init, SI_SUB_CPU, SI_ORDER_ANY, idr_preload_init, NULL);
100
101static void
102idr_preload_uninit(void *arg)
103{
104	int cpu;
105
106	CPU_FOREACH(cpu) {
107		struct idr_layer *cacheval;
108		struct linux_idr_cache *lic =
109		    DPCPU_ID_PTR(cpu, linux_idr_cache);
110
111		while (1) {
112			spin_lock(&lic->lock);
113			cacheval = idr_preload_dequeue_locked(lic);
114			spin_unlock(&lic->lock);
115
116			if (cacheval == NULL)
117				break;
118			free(cacheval, M_IDR);
119		}
120		spin_lock_destroy(&lic->lock);
121	}
122}
123SYSUNINIT(idr_preload_uninit, SI_SUB_LOCK, SI_ORDER_FIRST, idr_preload_uninit, NULL);
124
125void
126idr_preload(gfp_t gfp_mask)
127{
128	struct linux_idr_cache *lic;
129	struct idr_layer *cacheval;
130
131	sched_pin();
132
133	lic = &DPCPU_GET(linux_idr_cache);
134
135	/* fill up cache */
136	spin_lock(&lic->lock);
137	while (lic->count < MAX_IDR_FREE) {
138		spin_unlock(&lic->lock);
139		cacheval = malloc(sizeof(*cacheval), M_IDR, M_ZERO | gfp_mask);
140		spin_lock(&lic->lock);
141		if (cacheval == NULL)
142			break;
143		cacheval->ary[0] = lic->head;
144		lic->head = cacheval;
145		lic->count++;
146	}
147}
148
149void
150idr_preload_end(void)
151{
152	struct linux_idr_cache *lic;
153
154	lic = &DPCPU_GET(linux_idr_cache);
155	spin_unlock(&lic->lock);
156	sched_unpin();
157}
158
159static inline int
160idr_max(struct idr *idr)
161{
162	return (1 << (idr->layers * IDR_BITS)) - 1;
163}
164
165static inline int
166idr_pos(int id, int layer)
167{
168	return (id >> (IDR_BITS * layer)) & IDR_MASK;
169}
170
171void
172idr_init(struct idr *idr)
173{
174	bzero(idr, sizeof(*idr));
175	mtx_init(&idr->lock, "idr", NULL, MTX_DEF);
176}
177
178/* Only frees cached pages. */
179void
180idr_destroy(struct idr *idr)
181{
182	struct idr_layer *il, *iln;
183
184	idr_remove_all(idr);
185	mtx_lock(&idr->lock);
186	for (il = idr->free; il != NULL; il = iln) {
187		iln = il->ary[0];
188		free(il, M_IDR);
189	}
190	mtx_unlock(&idr->lock);
191	mtx_destroy(&idr->lock);
192}
193
194static void
195idr_remove_layer(struct idr_layer *il, int layer)
196{
197	int i;
198
199	if (il == NULL)
200		return;
201	if (layer == 0) {
202		free(il, M_IDR);
203		return;
204	}
205	for (i = 0; i < IDR_SIZE; i++)
206		if (il->ary[i])
207			idr_remove_layer(il->ary[i], layer - 1);
208}
209
210void
211idr_remove_all(struct idr *idr)
212{
213
214	mtx_lock(&idr->lock);
215	idr_remove_layer(idr->top, idr->layers - 1);
216	idr->top = NULL;
217	idr->layers = 0;
218	mtx_unlock(&idr->lock);
219}
220
221static void *
222idr_remove_locked(struct idr *idr, int id)
223{
224	struct idr_layer *il;
225	void *res;
226	int layer;
227	int idx;
228
229	id &= MAX_ID_MASK;
230	il = idr->top;
231	layer = idr->layers - 1;
232	if (il == NULL || id > idr_max(idr))
233		return (NULL);
234	/*
235	 * Walk down the tree to this item setting bitmaps along the way
236	 * as we know at least one item will be free along this path.
237	 */
238	while (layer && il) {
239		idx = idr_pos(id, layer);
240		il->bitmap |= 1 << idx;
241		il = il->ary[idx];
242		layer--;
243	}
244	idx = id & IDR_MASK;
245	/*
246	 * At this point we've set free space bitmaps up the whole tree.
247	 * We could make this non-fatal and unwind but linux dumps a stack
248	 * and a warning so I don't think it's necessary.
249	 */
250	if (il == NULL || (il->bitmap & (1 << idx)) != 0)
251		panic("idr_remove: Item %d not allocated (%p, %p)\n",
252		    id, idr, il);
253	res = il->ary[idx];
254	il->ary[idx] = NULL;
255	il->bitmap |= 1 << idx;
256
257	return (res);
258}
259
260void *
261idr_remove(struct idr *idr, int id)
262{
263	void *res;
264
265	mtx_lock(&idr->lock);
266	res = idr_remove_locked(idr, id);
267	mtx_unlock(&idr->lock);
268
269	return (res);
270}
271
272static inline struct idr_layer *
273idr_find_layer_locked(struct idr *idr, int id)
274{
275	struct idr_layer *il;
276	int layer;
277
278	id &= MAX_ID_MASK;
279	il = idr->top;
280	layer = idr->layers - 1;
281	if (il == NULL || id > idr_max(idr))
282		return (NULL);
283	while (layer && il) {
284		il = il->ary[idr_pos(id, layer)];
285		layer--;
286	}
287	return (il);
288}
289
290void *
291idr_replace(struct idr *idr, void *ptr, int id)
292{
293	struct idr_layer *il;
294	void *res;
295	int idx;
296
297	mtx_lock(&idr->lock);
298	il = idr_find_layer_locked(idr, id);
299	idx = id & IDR_MASK;
300
301	/* Replace still returns an error if the item was not allocated. */
302	if (il == NULL || (il->bitmap & (1 << idx))) {
303		res = ERR_PTR(-ENOENT);
304	} else {
305		res = il->ary[idx];
306		il->ary[idx] = ptr;
307	}
308	mtx_unlock(&idr->lock);
309	return (res);
310}
311
312static inline void *
313idr_find_locked(struct idr *idr, int id)
314{
315	struct idr_layer *il;
316	void *res;
317
318	mtx_assert(&idr->lock, MA_OWNED);
319	il = idr_find_layer_locked(idr, id);
320	if (il != NULL)
321		res = il->ary[id & IDR_MASK];
322	else
323		res = NULL;
324	return (res);
325}
326
327void *
328idr_find(struct idr *idr, int id)
329{
330	void *res;
331
332	mtx_lock(&idr->lock);
333	res = idr_find_locked(idr, id);
334	mtx_unlock(&idr->lock);
335	return (res);
336}
337
338void *
339idr_get_next(struct idr *idr, int *nextidp)
340{
341	void *res = NULL;
342	int id = *nextidp;
343
344	mtx_lock(&idr->lock);
345	for (; id <= idr_max(idr); id++) {
346		res = idr_find_locked(idr, id);
347		if (res == NULL)
348			continue;
349		*nextidp = id;
350		break;
351	}
352	mtx_unlock(&idr->lock);
353	return (res);
354}
355
356int
357idr_pre_get(struct idr *idr, gfp_t gfp_mask)
358{
359	struct idr_layer *il, *iln;
360	struct idr_layer *head;
361	int need;
362
363	mtx_lock(&idr->lock);
364	for (;;) {
365		need = idr->layers + 1;
366		for (il = idr->free; il != NULL; il = il->ary[0])
367			need--;
368		mtx_unlock(&idr->lock);
369		if (need <= 0)
370			break;
371		for (head = NULL; need; need--) {
372			iln = malloc(sizeof(*il), M_IDR, M_ZERO | gfp_mask);
373			if (iln == NULL)
374				break;
375			bitmap_fill(&iln->bitmap, IDR_SIZE);
376			if (head != NULL) {
377				il->ary[0] = iln;
378				il = iln;
379			} else
380				head = il = iln;
381		}
382		if (head == NULL)
383			return (0);
384		mtx_lock(&idr->lock);
385		il->ary[0] = idr->free;
386		idr->free = head;
387	}
388	return (1);
389}
390
391static struct idr_layer *
392idr_free_list_get(struct idr *idp)
393{
394	struct idr_layer *il;
395
396	if ((il = idp->free) != NULL) {
397		idp->free = il->ary[0];
398		il->ary[0] = NULL;
399	}
400	return (il);
401}
402
403static inline struct idr_layer *
404idr_get(struct idr *idp)
405{
406	struct idr_layer *il;
407
408	if ((il = idr_free_list_get(idp)) != NULL) {
409		MPASS(il->bitmap != 0);
410	} else if ((il = malloc(sizeof(*il), M_IDR, M_ZERO | M_NOWAIT)) != NULL) {
411		bitmap_fill(&il->bitmap, IDR_SIZE);
412	} else if ((il = idr_preload_dequeue_locked(&DPCPU_GET(linux_idr_cache))) != NULL) {
413		bitmap_fill(&il->bitmap, IDR_SIZE);
414	} else {
415		return (NULL);
416	}
417	return (il);
418}
419
420/*
421 * Could be implemented as get_new_above(idr, ptr, 0, idp) but written
422 * first for simplicity sake.
423 */
424static int
425idr_get_new_locked(struct idr *idr, void *ptr, int *idp)
426{
427	struct idr_layer *stack[MAX_LEVEL];
428	struct idr_layer *il;
429	int error;
430	int layer;
431	int idx;
432	int id;
433
434	mtx_assert(&idr->lock, MA_OWNED);
435
436	error = -EAGAIN;
437	/*
438	 * Expand the tree until there is free space.
439	 */
440	if (idr->top == NULL || idr->top->bitmap == 0) {
441		if (idr->layers == MAX_LEVEL + 1) {
442			error = -ENOSPC;
443			goto out;
444		}
445		il = idr_get(idr);
446		if (il == NULL)
447			goto out;
448		il->ary[0] = idr->top;
449		if (idr->top)
450			il->bitmap &= ~1;
451		idr->top = il;
452		idr->layers++;
453	}
454	il = idr->top;
455	id = 0;
456	/*
457	 * Walk the tree following free bitmaps, record our path.
458	 */
459	for (layer = idr->layers - 1;; layer--) {
460		stack[layer] = il;
461		idx = ffsl(il->bitmap);
462		if (idx == 0)
463			panic("idr_get_new: Invalid leaf state (%p, %p)\n",
464			    idr, il);
465		idx--;
466		id |= idx << (layer * IDR_BITS);
467		if (layer == 0)
468			break;
469		if (il->ary[idx] == NULL) {
470			il->ary[idx] = idr_get(idr);
471			if (il->ary[idx] == NULL)
472				goto out;
473		}
474		il = il->ary[idx];
475	}
476	/*
477	 * Allocate the leaf to the consumer.
478	 */
479	il->bitmap &= ~(1 << idx);
480	il->ary[idx] = ptr;
481	*idp = id;
482	/*
483	 * Clear bitmaps potentially up to the root.
484	 */
485	while (il->bitmap == 0 && ++layer < idr->layers) {
486		il = stack[layer];
487		il->bitmap &= ~(1 << idr_pos(id, layer));
488	}
489	error = 0;
490out:
491#ifdef INVARIANTS
492	if (error == 0 && idr_find_locked(idr, id) != ptr) {
493		panic("idr_get_new: Failed for idr %p, id %d, ptr %p\n",
494		    idr, id, ptr);
495	}
496#endif
497	return (error);
498}
499
500int
501idr_get_new(struct idr *idr, void *ptr, int *idp)
502{
503	int retval;
504
505	mtx_lock(&idr->lock);
506	retval = idr_get_new_locked(idr, ptr, idp);
507	mtx_unlock(&idr->lock);
508	return (retval);
509}
510
511static int
512idr_get_new_above_locked(struct idr *idr, void *ptr, int starting_id, int *idp)
513{
514	struct idr_layer *stack[MAX_LEVEL];
515	struct idr_layer *il;
516	int error;
517	int layer;
518	int idx, sidx;
519	int id;
520
521	mtx_assert(&idr->lock, MA_OWNED);
522
523	error = -EAGAIN;
524	/*
525	 * Compute the layers required to support starting_id and the mask
526	 * at the top layer.
527	 */
528restart:
529	idx = starting_id;
530	layer = 0;
531	while (idx & ~IDR_MASK) {
532		layer++;
533		idx >>= IDR_BITS;
534	}
535	if (layer == MAX_LEVEL + 1) {
536		error = -ENOSPC;
537		goto out;
538	}
539	/*
540	 * Expand the tree until there is free space at or beyond starting_id.
541	 */
542	while (idr->layers <= layer ||
543	    idr->top->bitmap < (1 << idr_pos(starting_id, idr->layers - 1))) {
544		if (idr->layers == MAX_LEVEL + 1) {
545			error = -ENOSPC;
546			goto out;
547		}
548		il = idr_get(idr);
549		if (il == NULL)
550			goto out;
551		il->ary[0] = idr->top;
552		if (idr->top && idr->top->bitmap == 0)
553			il->bitmap &= ~1;
554		idr->top = il;
555		idr->layers++;
556	}
557	il = idr->top;
558	id = 0;
559	/*
560	 * Walk the tree following free bitmaps, record our path.
561	 */
562	for (layer = idr->layers - 1;; layer--) {
563		stack[layer] = il;
564		sidx = idr_pos(starting_id, layer);
565		/* Returns index numbered from 0 or size if none exists. */
566		idx = find_next_bit(&il->bitmap, IDR_SIZE, sidx);
567		if (idx == IDR_SIZE && sidx == 0)
568			panic("idr_get_new: Invalid leaf state (%p, %p)\n",
569			    idr, il);
570		/*
571		 * We may have walked a path where there was a free bit but
572		 * it was lower than what we wanted.  Restart the search with
573		 * a larger starting id.  id contains the progress we made so
574		 * far.  Search the leaf one above this level.  This may
575		 * restart as many as MAX_LEVEL times but that is expected
576		 * to be rare.
577		 */
578		if (idx == IDR_SIZE) {
579			starting_id = id + (1 << ((layer + 1) * IDR_BITS));
580			goto restart;
581		}
582		if (idx > sidx)
583			starting_id = 0;	/* Search the whole subtree. */
584		id |= idx << (layer * IDR_BITS);
585		if (layer == 0)
586			break;
587		if (il->ary[idx] == NULL) {
588			il->ary[idx] = idr_get(idr);
589			if (il->ary[idx] == NULL)
590				goto out;
591		}
592		il = il->ary[idx];
593	}
594	/*
595	 * Allocate the leaf to the consumer.
596	 */
597	il->bitmap &= ~(1 << idx);
598	il->ary[idx] = ptr;
599	*idp = id;
600	/*
601	 * Clear bitmaps potentially up to the root.
602	 */
603	while (il->bitmap == 0 && ++layer < idr->layers) {
604		il = stack[layer];
605		il->bitmap &= ~(1 << idr_pos(id, layer));
606	}
607	error = 0;
608out:
609#ifdef INVARIANTS
610	if (error == 0 && idr_find_locked(idr, id) != ptr) {
611		panic("idr_get_new_above: Failed for idr %p, id %d, ptr %p\n",
612		    idr, id, ptr);
613	}
614#endif
615	return (error);
616}
617
618int
619idr_get_new_above(struct idr *idr, void *ptr, int starting_id, int *idp)
620{
621	int retval;
622
623	mtx_lock(&idr->lock);
624	retval = idr_get_new_above_locked(idr, ptr, starting_id, idp);
625	mtx_unlock(&idr->lock);
626	return (retval);
627}
628
629int
630ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
631{
632	return (idr_get_new_above(&ida->idr, NULL, starting_id, p_id));
633}
634
635static int
636idr_alloc_locked(struct idr *idr, void *ptr, int start, int end)
637{
638	int max = end > 0 ? end - 1 : INT_MAX;
639	int error;
640	int id;
641
642	mtx_assert(&idr->lock, MA_OWNED);
643
644	if (unlikely(start < 0))
645		return (-EINVAL);
646	if (unlikely(max < start))
647		return (-ENOSPC);
648
649	if (start == 0)
650		error = idr_get_new_locked(idr, ptr, &id);
651	else
652		error = idr_get_new_above_locked(idr, ptr, start, &id);
653
654	if (unlikely(error < 0))
655		return (error);
656	if (unlikely(id > max)) {
657		idr_remove_locked(idr, id);
658		return (-ENOSPC);
659	}
660	return (id);
661}
662
663int
664idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
665{
666	int retval;
667
668	mtx_lock(&idr->lock);
669	retval = idr_alloc_locked(idr, ptr, start, end);
670	mtx_unlock(&idr->lock);
671	return (retval);
672}
673
674int
675idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
676{
677	int retval;
678
679	mtx_lock(&idr->lock);
680	retval = idr_alloc_locked(idr, ptr, max(start, idr->next_cyclic_id), end);
681	if (unlikely(retval == -ENOSPC))
682		retval = idr_alloc_locked(idr, ptr, start, end);
683	if (likely(retval >= 0))
684		idr->next_cyclic_id = retval + 1;
685	mtx_unlock(&idr->lock);
686	return (retval);
687}
688
689static int
690idr_for_each_layer(struct idr_layer *il, int offset, int layer,
691    int (*f)(int id, void *p, void *data), void *data)
692{
693	int i, err;
694
695	if (il == NULL)
696		return (0);
697	if (layer == 0) {
698		for (i = 0; i < IDR_SIZE; i++) {
699			if (il->ary[i] == NULL)
700				continue;
701			err = f(i + offset, il->ary[i],  data);
702			if (err)
703				return (err);
704		}
705		return (0);
706	}
707	for (i = 0; i < IDR_SIZE; i++) {
708		if (il->ary[i] == NULL)
709			continue;
710		err = idr_for_each_layer(il->ary[i],
711		    (i + offset) * IDR_SIZE, layer - 1, f, data);
712		if (err)
713			return (err);
714	}
715	return (0);
716}
717
718/* NOTE: It is not allowed to modify the IDR tree while it is being iterated */
719int
720idr_for_each(struct idr *idp, int (*f)(int id, void *p, void *data), void *data)
721{
722	return (idr_for_each_layer(idp->top, 0, idp->layers - 1, f, data));
723}
724
725static int
726idr_has_entry(int id, void *p, void *data)
727{
728
729	return (1);
730}
731
732bool
733idr_is_empty(struct idr *idp)
734{
735
736	return (idr_for_each(idp, idr_has_entry, NULL) == 0);
737}
738
739int
740ida_pre_get(struct ida *ida, gfp_t flags)
741{
742	if (idr_pre_get(&ida->idr, flags) == 0)
743		return (0);
744
745	if (ida->free_bitmap == NULL) {
746		ida->free_bitmap =
747		    malloc(sizeof(struct ida_bitmap), M_IDR, flags);
748	}
749	return (ida->free_bitmap != NULL);
750}
751
752int
753ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
754    gfp_t flags)
755{
756	int ret, id;
757	unsigned int max;
758
759	MPASS((int)start >= 0);
760	MPASS((int)end >= 0);
761
762	if (end == 0)
763		max = 0x80000000;
764	else {
765		MPASS(end > start);
766		max = end - 1;
767	}
768again:
769	if (!ida_pre_get(ida, flags))
770		return (-ENOMEM);
771
772	if ((ret = ida_get_new_above(ida, start, &id)) == 0) {
773		if (id > max) {
774			ida_remove(ida, id);
775			ret = -ENOSPC;
776		} else {
777			ret = id;
778		}
779	}
780	if (__predict_false(ret == -EAGAIN))
781		goto again;
782
783	return (ret);
784}
785
786void
787ida_simple_remove(struct ida *ida, unsigned int id)
788{
789	idr_remove(&ida->idr, id);
790}
791
792void
793ida_remove(struct ida *ida, int id)
794{
795	idr_remove(&ida->idr, id);
796}
797
798void
799ida_init(struct ida *ida)
800{
801	idr_init(&ida->idr);
802}
803
804void
805ida_destroy(struct ida *ida)
806{
807	idr_destroy(&ida->idr);
808	free(ida->free_bitmap, M_IDR);
809}
810