1// SPDX-License-Identifier: GPL-2.0-only
2
3#define pr_fmt(fmt)	"rtas-work-area: " fmt
4
5#include <linux/genalloc.h>
6#include <linux/log2.h>
7#include <linux/kernel.h>
8#include <linux/memblock.h>
9#include <linux/mempool.h>
10#include <linux/minmax.h>
11#include <linux/mutex.h>
12#include <linux/numa.h>
13#include <linux/sizes.h>
14#include <linux/wait.h>
15
16#include <asm/machdep.h>
17#include <asm/rtas-work-area.h>
18#include <asm/rtas.h>
19
20enum {
21	/*
22	 * Ensure the pool is page-aligned.
23	 */
24	RTAS_WORK_AREA_ARENA_ALIGN = PAGE_SIZE,
25	/*
26	 * Don't let a single allocation claim the whole arena.
27	 */
28	RTAS_WORK_AREA_ARENA_SZ = RTAS_WORK_AREA_MAX_ALLOC_SZ * 2,
29	/*
30	 * The smallest known work area size is for ibm,get-vpd's
31	 * location code argument, which is limited to 79 characters
32	 * plus 1 nul terminator.
33	 *
34	 * PAPR+ 7.3.20 ibm,get-vpd RTAS Call
35	 * PAPR+ 12.3.2.4 Converged Location Code Rules - Length Restrictions
36	 */
37	RTAS_WORK_AREA_MIN_ALLOC_SZ = roundup_pow_of_two(80),
38};
39
40static struct {
41	struct gen_pool *gen_pool;
42	char *arena;
43	struct mutex mutex; /* serializes allocations */
44	struct wait_queue_head wqh;
45	mempool_t descriptor_pool;
46	bool available;
47} rwa_state = {
48	.mutex = __MUTEX_INITIALIZER(rwa_state.mutex),
49	.wqh = __WAIT_QUEUE_HEAD_INITIALIZER(rwa_state.wqh),
50};
51
52/*
53 * A single work area buffer and descriptor to serve requests early in
54 * boot before the allocator is fully initialized. We know 4KB is the
55 * most any boot time user needs (they all call ibm,get-system-parameter).
56 */
57static bool early_work_area_in_use __initdata;
58static char early_work_area_buf[SZ_4K] __initdata __aligned(SZ_4K);
59static struct rtas_work_area early_work_area __initdata = {
60	.buf = early_work_area_buf,
61	.size = sizeof(early_work_area_buf),
62};
63
64
65static struct rtas_work_area * __init rtas_work_area_alloc_early(size_t size)
66{
67	WARN_ON(size > early_work_area.size);
68	WARN_ON(early_work_area_in_use);
69	early_work_area_in_use = true;
70	memset(early_work_area.buf, 0, early_work_area.size);
71	return &early_work_area;
72}
73
74static void __init rtas_work_area_free_early(struct rtas_work_area *work_area)
75{
76	WARN_ON(work_area != &early_work_area);
77	WARN_ON(!early_work_area_in_use);
78	early_work_area_in_use = false;
79}
80
81struct rtas_work_area * __ref __rtas_work_area_alloc(size_t size)
82{
83	struct rtas_work_area *area;
84	unsigned long addr;
85
86	might_sleep();
87
88	/*
89	 * The rtas_work_area_alloc() wrapper enforces this at build
90	 * time. Requests that exceed the arena size will block
91	 * indefinitely.
92	 */
93	WARN_ON(size > RTAS_WORK_AREA_MAX_ALLOC_SZ);
94
95	if (!rwa_state.available)
96		return rtas_work_area_alloc_early(size);
97	/*
98	 * To ensure FCFS behavior and prevent a high rate of smaller
99	 * requests from starving larger ones, use the mutex to queue
100	 * allocations.
101	 */
102	mutex_lock(&rwa_state.mutex);
103	wait_event(rwa_state.wqh,
104		   (addr = gen_pool_alloc(rwa_state.gen_pool, size)) != 0);
105	mutex_unlock(&rwa_state.mutex);
106
107	area = mempool_alloc(&rwa_state.descriptor_pool, GFP_KERNEL);
108	area->buf = (char *)addr;
109	area->size = size;
110
111	return area;
112}
113
114void __ref rtas_work_area_free(struct rtas_work_area *area)
115{
116	if (!rwa_state.available) {
117		rtas_work_area_free_early(area);
118		return;
119	}
120
121	gen_pool_free(rwa_state.gen_pool, (unsigned long)area->buf, area->size);
122	mempool_free(area, &rwa_state.descriptor_pool);
123	wake_up(&rwa_state.wqh);
124}
125
126/*
127 * Initialization of the work area allocator happens in two parts. To
128 * reliably reserve an arena that satisfies RTAS addressing
129 * requirements, we must perform a memblock allocation early,
130 * immmediately after RTAS instantiation. Then we have to wait until
131 * the slab allocator is up before setting up the descriptor mempool
132 * and adding the arena to a gen_pool.
133 */
134static __init int rtas_work_area_allocator_init(void)
135{
136	const unsigned int order = ilog2(RTAS_WORK_AREA_MIN_ALLOC_SZ);
137	const phys_addr_t pa_start = __pa(rwa_state.arena);
138	const phys_addr_t pa_end = pa_start + RTAS_WORK_AREA_ARENA_SZ - 1;
139	struct gen_pool *pool;
140	const int nid = NUMA_NO_NODE;
141	int err;
142
143	err = -ENOMEM;
144	if (!rwa_state.arena)
145		goto err_out;
146
147	pool = gen_pool_create(order, nid);
148	if (!pool)
149		goto err_out;
150	/*
151	 * All RTAS functions that consume work areas are OK with
152	 * natural alignment, when they have alignment requirements at
153	 * all.
154	 */
155	gen_pool_set_algo(pool, gen_pool_first_fit_order_align, NULL);
156
157	err = gen_pool_add(pool, (unsigned long)rwa_state.arena,
158			   RTAS_WORK_AREA_ARENA_SZ, nid);
159	if (err)
160		goto err_destroy;
161
162	err = mempool_init_kmalloc_pool(&rwa_state.descriptor_pool, 1,
163					sizeof(struct rtas_work_area));
164	if (err)
165		goto err_destroy;
166
167	rwa_state.gen_pool = pool;
168	rwa_state.available = true;
169
170	pr_debug("arena [%pa-%pa] (%uK), min/max alloc sizes %u/%u\n",
171		 &pa_start, &pa_end,
172		 RTAS_WORK_AREA_ARENA_SZ / SZ_1K,
173		 RTAS_WORK_AREA_MIN_ALLOC_SZ,
174		 RTAS_WORK_AREA_MAX_ALLOC_SZ);
175
176	return 0;
177
178err_destroy:
179	gen_pool_destroy(pool);
180err_out:
181	return err;
182}
183machine_arch_initcall(pseries, rtas_work_area_allocator_init);
184
185/**
186 * rtas_work_area_reserve_arena() - Reserve memory suitable for RTAS work areas.
187 * @limit: Upper limit for memblock allocation.
188 */
189void __init rtas_work_area_reserve_arena(const phys_addr_t limit)
190{
191	const phys_addr_t align = RTAS_WORK_AREA_ARENA_ALIGN;
192	const phys_addr_t size = RTAS_WORK_AREA_ARENA_SZ;
193	const phys_addr_t min = MEMBLOCK_LOW_LIMIT;
194	const int nid = NUMA_NO_NODE;
195
196	/*
197	 * Too early for a machine_is(pseries) check. But PAPR
198	 * effectively mandates that ibm,get-system-parameter is
199	 * present:
200	 *
201	 * R1���7.3.16���1. All platforms must support the System
202	 * Parameters option.
203	 *
204	 * So set up the arena if we find that, with a fallback to
205	 * ibm,configure-connector, just in case.
206	 */
207	if (rtas_function_implemented(RTAS_FN_IBM_GET_SYSTEM_PARAMETER) ||
208	    rtas_function_implemented(RTAS_FN_IBM_CONFIGURE_CONNECTOR))
209		rwa_state.arena = memblock_alloc_try_nid(size, align, min, limit, nid);
210}
211