133965Sjdp// SPDX-License-Identifier: GPL-2.0
2218822Sdim
3218822Sdim#include <linux/objpool.h>
433965Sjdp#include <linux/slab.h>
533965Sjdp#include <linux/vmalloc.h>
633965Sjdp#include <linux/atomic.h>
733965Sjdp#include <linux/irqflags.h>
833965Sjdp#include <linux/cpumask.h>
933965Sjdp#include <linux/log2.h>
1033965Sjdp
1133965Sjdp/*
1233965Sjdp * objpool: ring-array based lockless MPMC/FIFO queues
1333965Sjdp *
1433965Sjdp * Copyright: wuqiang.matt@bytedance.com,mhiramat@kernel.org
1533965Sjdp */
1633965Sjdp
1733965Sjdp/* initialize percpu objpool_slot */
1877298Sobrienstatic int
19218822Sdimobjpool_init_percpu_slot(struct objpool_head *pool,
20218822Sdim			 struct objpool_slot *slot,
2133965Sjdp			 int nodes, void *context,
2233965Sjdp			 objpool_init_obj_cb objinit)
2389857Sobrien{
2433965Sjdp	void *obj = (void *)&slot->entries[pool->capacity];
2533965Sjdp	int i;
2633965Sjdp
2733965Sjdp	/* initialize elements of percpu objpool_slot */
2833965Sjdp	slot->mask = pool->capacity - 1;
2933965Sjdp
3033965Sjdp	for (i = 0; i < nodes; i++) {
3133965Sjdp		if (objinit) {
3233965Sjdp			int rc = objinit(obj, context);
33130561Sobrien			if (rc)
3433965Sjdp				return rc;
3533965Sjdp		}
3633965Sjdp		slot->entries[slot->tail & slot->mask] = obj;
3733965Sjdp		obj = obj + pool->obj_size;
3833965Sjdp		slot->tail++;
3933965Sjdp		slot->last = slot->tail;
4033965Sjdp		pool->nr_objs++;
4133965Sjdp	}
4233965Sjdp
4333965Sjdp	return 0;
4433965Sjdp}
4533965Sjdp
4633965Sjdp/* allocate and initialize percpu slots */
4733965Sjdpstatic int
4833965Sjdpobjpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
4933965Sjdp			  void *context, objpool_init_obj_cb objinit)
5033965Sjdp{
5133965Sjdp	int i, cpu_count = 0;
5233965Sjdp
5333965Sjdp	for (i = 0; i < pool->nr_cpus; i++) {
5433965Sjdp
5533965Sjdp		struct objpool_slot *slot;
5633965Sjdp		int nodes, size, rc;
5733965Sjdp
5833965Sjdp		/* skip the cpu node which could never be present */
5933965Sjdp		if (!cpu_possible(i))
6033965Sjdp			continue;
6133965Sjdp
6233965Sjdp		/* compute how many objects to be allocated with this slot */
6333965Sjdp		nodes = nr_objs / num_possible_cpus();
6433965Sjdp		if (cpu_count < (nr_objs % num_possible_cpus()))
6533965Sjdp			nodes++;
6633965Sjdp		cpu_count++;
6733965Sjdp
6833965Sjdp		size = struct_size(slot, entries, pool->capacity) +
6933965Sjdp			pool->obj_size * nodes;
7033965Sjdp
7133965Sjdp		/*
7233965Sjdp		 * here we allocate percpu-slot & objs together in a single
7333965Sjdp		 * allocation to make it more compact, taking advantage of
7433965Sjdp		 * warm caches and TLB hits. in default vmalloc is used to
7533965Sjdp		 * reduce the pressure of kernel slab system. as we know,
76130561Sobrien		 * mimimal size of vmalloc is one page since vmalloc would
77130561Sobrien		 * always align the requested size to page size
78130561Sobrien		 */
79130561Sobrien		if (pool->gfp & GFP_ATOMIC)
80130561Sobrien			slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
81130561Sobrien		else
8233965Sjdp			slot = __vmalloc_node(size, sizeof(void *), pool->gfp,
8377298Sobrien				cpu_to_node(i), __builtin_return_address(0));
8433965Sjdp		if (!slot)
8538889Sjdp			return -ENOMEM;
8638889Sjdp		memset(slot, 0, size);
8733965Sjdp		pool->cpu_slots[i] = slot;
8838889Sjdp
8933965Sjdp		/* initialize the objpool_slot of cpu node i */
9033965Sjdp		rc = objpool_init_percpu_slot(pool, slot, nodes, context, objinit);
9133965Sjdp		if (rc)
9233965Sjdp			return rc;
9333965Sjdp	}
9433965Sjdp
9533965Sjdp	return 0;
9633965Sjdp}
9733965Sjdp
9833965Sjdp/* cleanup all percpu slots of the object pool */
9933965Sjdpstatic void objpool_fini_percpu_slots(struct objpool_head *pool)
10033965Sjdp{
10133965Sjdp	int i;
10233965Sjdp
10333965Sjdp	if (!pool->cpu_slots)
10433965Sjdp		return;
10533965Sjdp
10633965Sjdp	for (i = 0; i < pool->nr_cpus; i++)
10733965Sjdp		kvfree(pool->cpu_slots[i]);
10833965Sjdp	kfree(pool->cpu_slots);
10933965Sjdp}
11033965Sjdp
11133965Sjdp/* initialize object pool and pre-allocate objects */
11233965Sjdpint objpool_init(struct objpool_head *pool, int nr_objs, int object_size,
11333965Sjdp		gfp_t gfp, void *context, objpool_init_obj_cb objinit,
11433965Sjdp		objpool_fini_cb release)
11533965Sjdp{
11633965Sjdp	int rc, capacity, slot_size;
11733965Sjdp
11833965Sjdp	/* check input parameters */
11933965Sjdp	if (nr_objs <= 0 || nr_objs > OBJPOOL_NR_OBJECT_MAX ||
12033965Sjdp	    object_size <= 0 || object_size > OBJPOOL_OBJECT_SIZE_MAX)
12133965Sjdp		return -EINVAL;
12233965Sjdp
12333965Sjdp	/* align up to unsigned long size */
12433965Sjdp	object_size = ALIGN(object_size, sizeof(long));
12533965Sjdp
12633965Sjdp	/* calculate capacity of percpu objpool_slot */
12733965Sjdp	capacity = roundup_pow_of_two(nr_objs);
12833965Sjdp	if (!capacity)
12933965Sjdp		return -EINVAL;
13033965Sjdp
13133965Sjdp	/* initialize objpool pool */
13233965Sjdp	memset(pool, 0, sizeof(struct objpool_head));
13333965Sjdp	pool->nr_cpus = nr_cpu_ids;
13433965Sjdp	pool->obj_size = object_size;
13533965Sjdp	pool->capacity = capacity;
13633965Sjdp	pool->gfp = gfp & ~__GFP_ZERO;
13733965Sjdp	pool->context = context;
13833965Sjdp	pool->release = release;
13933965Sjdp	slot_size = pool->nr_cpus * sizeof(struct objpool_slot);
14033965Sjdp	pool->cpu_slots = kzalloc(slot_size, pool->gfp);
14133965Sjdp	if (!pool->cpu_slots)
14233965Sjdp		return -ENOMEM;
14333965Sjdp
14433965Sjdp	/* initialize per-cpu slots */
14533965Sjdp	rc = objpool_init_percpu_slots(pool, nr_objs, context, objinit);
14633965Sjdp	if (rc)
14733965Sjdp		objpool_fini_percpu_slots(pool);
14833965Sjdp	else
14933965Sjdp		refcount_set(&pool->ref, pool->nr_objs + 1);
15033965Sjdp
15133965Sjdp	return rc;
15233965Sjdp}
15333965SjdpEXPORT_SYMBOL_GPL(objpool_init);
15433965Sjdp
15533965Sjdp/* adding object to slot, abort if the slot was already full */
15633965Sjdpstatic inline int
15733965Sjdpobjpool_try_add_slot(void *obj, struct objpool_head *pool, int cpu)
15833965Sjdp{
15933965Sjdp	struct objpool_slot *slot = pool->cpu_slots[cpu];
16033965Sjdp	uint32_t head, tail;
16133965Sjdp
16233965Sjdp	/* loading tail and head as a local snapshot, tail first */
16333965Sjdp	tail = READ_ONCE(slot->tail);
16433965Sjdp
16533965Sjdp	do {
16633965Sjdp		head = READ_ONCE(slot->head);
16733965Sjdp		/* fault caught: something must be wrong */
16889857Sobrien		WARN_ON_ONCE(tail - head > pool->nr_objs);
16933965Sjdp	} while (!try_cmpxchg_acquire(&slot->tail, &tail, tail + 1));
17033965Sjdp
17133965Sjdp	/* now the tail position is reserved for the given obj */
17233965Sjdp	WRITE_ONCE(slot->entries[tail & slot->mask], obj);
17333965Sjdp	/* update sequence to make this obj available for pop() */
17433965Sjdp	smp_store_release(&slot->last, tail + 1);
17533965Sjdp
17633965Sjdp	return 0;
17733965Sjdp}
17833965Sjdp
17933965Sjdp/* reclaim an object to object pool */
18033965Sjdpint objpool_push(void *obj, struct objpool_head *pool)
18133965Sjdp{
18277298Sobrien	unsigned long flags;
18333965Sjdp	int rc;
18477298Sobrien
18533965Sjdp	/* disable local irq to avoid preemption & interruption */
18633965Sjdp	raw_local_irq_save(flags);
18733965Sjdp	rc = objpool_try_add_slot(obj, pool, raw_smp_processor_id());
18833965Sjdp	raw_local_irq_restore(flags);
18933965Sjdp
19033965Sjdp	return rc;
19133965Sjdp}
19233965SjdpEXPORT_SYMBOL_GPL(objpool_push);
19333965Sjdp
19433965Sjdp/* try to retrieve object from slot */
19538889Sjdpstatic inline void *objpool_try_get_slot(struct objpool_head *pool, int cpu)
19633965Sjdp{
19789857Sobrien	struct objpool_slot *slot = pool->cpu_slots[cpu];
19833965Sjdp	/* load head snapshot, other cpus may change it */
19933965Sjdp	uint32_t head = smp_load_acquire(&slot->head);
20033965Sjdp
20133965Sjdp	while (head != READ_ONCE(slot->last)) {
20233965Sjdp		void *obj;
20333965Sjdp
20433965Sjdp		/*
20533965Sjdp		 * data visibility of 'last' and 'head' could be out of
20633965Sjdp		 * order since memory updating of 'last' and 'head' are
20733965Sjdp		 * performed in push() and pop() independently
20833965Sjdp		 *
20933965Sjdp		 * before any retrieving attempts, pop() must guarantee
21033965Sjdp		 * 'last' is behind 'head', that is to say, there must
21133965Sjdp		 * be available objects in slot, which could be ensured
21233965Sjdp		 * by condition 'last != head && last - head <= nr_objs'
21333965Sjdp		 * that is equivalent to 'last - head - 1 < nr_objs' as
21433965Sjdp		 * 'last' and 'head' are both unsigned int32
21533965Sjdp		 */
21633965Sjdp		if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) {
21733965Sjdp			head = READ_ONCE(slot->head);
21889857Sobrien			continue;
21933965Sjdp		}
22077298Sobrien
22133965Sjdp		/* obj must be retrieved before moving forward head */
22233965Sjdp		obj = READ_ONCE(slot->entries[head & slot->mask]);
22333965Sjdp
22433965Sjdp		/* move head forward to mark it's consumption */
22533965Sjdp		if (try_cmpxchg_release(&slot->head, &head, head + 1))
22633965Sjdp			return obj;
22733965Sjdp	}
22833965Sjdp
22933965Sjdp	return NULL;
23033965Sjdp}
23133965Sjdp
23233965Sjdp/* allocate an object from object pool */
23333965Sjdpvoid *objpool_pop(struct objpool_head *pool)
23433965Sjdp{
23533965Sjdp	void *obj = NULL;
23677298Sobrien	unsigned long flags;
23733965Sjdp	int i, cpu;
23833965Sjdp
23933965Sjdp	/* disable local irq to avoid preemption & interruption */
24033965Sjdp	raw_local_irq_save(flags);
24133965Sjdp
24233965Sjdp	cpu = raw_smp_processor_id();
24333965Sjdp	for (i = 0; i < num_possible_cpus(); i++) {
24433965Sjdp		obj = objpool_try_get_slot(pool, cpu);
24533965Sjdp		if (obj)
24633965Sjdp			break;
24733965Sjdp		cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1);
24833965Sjdp	}
24933965Sjdp	raw_local_irq_restore(flags);
25033965Sjdp
25133965Sjdp	return obj;
25233965Sjdp}
25333965SjdpEXPORT_SYMBOL_GPL(objpool_pop);
25433965Sjdp
25533965Sjdp/* release whole objpool forcely */
25633965Sjdpvoid objpool_free(struct objpool_head *pool)
25733965Sjdp{
25833965Sjdp	if (!pool->cpu_slots)
25933965Sjdp		return;
26033965Sjdp
26133965Sjdp	/* release percpu slots */
26233965Sjdp	objpool_fini_percpu_slots(pool);
26333965Sjdp
26433965Sjdp	/* call user's cleanup callback if provided */
26533965Sjdp	if (pool->release)
26633965Sjdp		pool->release(pool, pool->context);
26733965Sjdp}
26833965SjdpEXPORT_SYMBOL_GPL(objpool_free);
26933965Sjdp
27033965Sjdp/* drop the allocated object, rather reclaim it to objpool */
27133965Sjdpint objpool_drop(void *obj, struct objpool_head *pool)
27289857Sobrien{
27333965Sjdp	if (!obj || !pool)
27433965Sjdp		return -EINVAL;
27533965Sjdp
27633965Sjdp	if (refcount_dec_and_test(&pool->ref)) {
27733965Sjdp		objpool_free(pool);
27833965Sjdp		return 0;
27933965Sjdp	}
28033965Sjdp
28133965Sjdp	return -EAGAIN;
28233965Sjdp}
28333965SjdpEXPORT_SYMBOL_GPL(objpool_drop);
28433965Sjdp
28533965Sjdp/* drop unused objects and defref objpool for releasing */
28633965Sjdpvoid objpool_fini(struct objpool_head *pool)
28733965Sjdp{
28833965Sjdp	int count = 1; /* extra ref for objpool itself */
28933965Sjdp
29033965Sjdp	/* drop all remained objects from objpool */
29133965Sjdp	while (objpool_pop(pool))
29233965Sjdp		count++;
29333965Sjdp
29433965Sjdp	if (refcount_sub_and_test(count, &pool->ref))
29533965Sjdp		objpool_free(pool);
29633965Sjdp}
29733965SjdpEXPORT_SYMBOL_GPL(objpool_fini);
29833965Sjdp