1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3#include <linux/memregion.h>
4#include <linux/genalloc.h>
5#include <linux/device.h>
6#include <linux/module.h>
7#include <linux/memory.h>
8#include <linux/slab.h>
9#include <linux/uuid.h>
10#include <linux/sort.h>
11#include <linux/idr.h>
12#include <cxlmem.h>
13#include <cxl.h>
14#include "core.h"
15
16/**
17 * DOC: cxl core region
18 *
19 * CXL Regions represent mapped memory capacity in system physical address
20 * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
21 * Memory ranges, Regions represent the active mapped capacity by the HDM
22 * Decoder Capability structures throughout the Host Bridges, Switches, and
23 * Endpoints in the topology.
24 *
25 * Region configuration has ordering constraints. UUID may be set at any time
26 * but is only visible for persistent regions.
27 * 1. Interleave granularity
28 * 2. Interleave size
29 * 3. Decoder targets
30 */
31
32static struct cxl_region *to_cxl_region(struct device *dev);
33
34#define __ACCESS_ATTR_RO(_level, _name) {				\
35	.attr	= { .name = __stringify(_name), .mode = 0444 },		\
36	.show	= _name##_access##_level##_show,			\
37}
38
39#define ACCESS_DEVICE_ATTR_RO(level, name)	\
40	struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name)
41
42#define ACCESS_ATTR_RO(level, attrib)					      \
43static ssize_t attrib##_access##level##_show(struct device *dev,	      \
44					  struct device_attribute *attr,      \
45					  char *buf)			      \
46{									      \
47	struct cxl_region *cxlr = to_cxl_region(dev);			      \
48									      \
49	if (cxlr->coord[level].attrib == 0)				      \
50		return -ENOENT;						      \
51									      \
52	return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib);	      \
53}									      \
54static ACCESS_DEVICE_ATTR_RO(level, attrib)
55
56ACCESS_ATTR_RO(0, read_bandwidth);
57ACCESS_ATTR_RO(0, read_latency);
58ACCESS_ATTR_RO(0, write_bandwidth);
59ACCESS_ATTR_RO(0, write_latency);
60
61#define ACCESS_ATTR_DECLARE(level, attrib)	\
62	(&dev_attr_access##level##_##attrib.attr)
63
64static struct attribute *access0_coordinate_attrs[] = {
65	ACCESS_ATTR_DECLARE(0, read_bandwidth),
66	ACCESS_ATTR_DECLARE(0, write_bandwidth),
67	ACCESS_ATTR_DECLARE(0, read_latency),
68	ACCESS_ATTR_DECLARE(0, write_latency),
69	NULL
70};
71
72ACCESS_ATTR_RO(1, read_bandwidth);
73ACCESS_ATTR_RO(1, read_latency);
74ACCESS_ATTR_RO(1, write_bandwidth);
75ACCESS_ATTR_RO(1, write_latency);
76
77static struct attribute *access1_coordinate_attrs[] = {
78	ACCESS_ATTR_DECLARE(1, read_bandwidth),
79	ACCESS_ATTR_DECLARE(1, write_bandwidth),
80	ACCESS_ATTR_DECLARE(1, read_latency),
81	ACCESS_ATTR_DECLARE(1, write_latency),
82	NULL
83};
84
85#define ACCESS_VISIBLE(level)						\
86static umode_t cxl_region_access##level##_coordinate_visible(		\
87		struct kobject *kobj, struct attribute *a, int n)	\
88{									\
89	struct device *dev = kobj_to_dev(kobj);				\
90	struct cxl_region *cxlr = to_cxl_region(dev);			\
91									\
92	if (a == &dev_attr_access##level##_read_latency.attr &&		\
93	    cxlr->coord[level].read_latency == 0)			\
94		return 0;						\
95									\
96	if (a == &dev_attr_access##level##_write_latency.attr &&	\
97	    cxlr->coord[level].write_latency == 0)			\
98		return 0;						\
99									\
100	if (a == &dev_attr_access##level##_read_bandwidth.attr &&	\
101	    cxlr->coord[level].read_bandwidth == 0)			\
102		return 0;						\
103									\
104	if (a == &dev_attr_access##level##_write_bandwidth.attr &&	\
105	    cxlr->coord[level].write_bandwidth == 0)			\
106		return 0;						\
107									\
108	return a->mode;							\
109}
110
111ACCESS_VISIBLE(0);
112ACCESS_VISIBLE(1);
113
114static const struct attribute_group cxl_region_access0_coordinate_group = {
115	.name = "access0",
116	.attrs = access0_coordinate_attrs,
117	.is_visible = cxl_region_access0_coordinate_visible,
118};
119
120static const struct attribute_group *get_cxl_region_access0_group(void)
121{
122	return &cxl_region_access0_coordinate_group;
123}
124
125static const struct attribute_group cxl_region_access1_coordinate_group = {
126	.name = "access1",
127	.attrs = access1_coordinate_attrs,
128	.is_visible = cxl_region_access1_coordinate_visible,
129};
130
131static const struct attribute_group *get_cxl_region_access1_group(void)
132{
133	return &cxl_region_access1_coordinate_group;
134}
135
136static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
137			 char *buf)
138{
139	struct cxl_region *cxlr = to_cxl_region(dev);
140	struct cxl_region_params *p = &cxlr->params;
141	ssize_t rc;
142
143	rc = down_read_interruptible(&cxl_region_rwsem);
144	if (rc)
145		return rc;
146	if (cxlr->mode != CXL_DECODER_PMEM)
147		rc = sysfs_emit(buf, "\n");
148	else
149		rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
150	up_read(&cxl_region_rwsem);
151
152	return rc;
153}
154
155static int is_dup(struct device *match, void *data)
156{
157	struct cxl_region_params *p;
158	struct cxl_region *cxlr;
159	uuid_t *uuid = data;
160
161	if (!is_cxl_region(match))
162		return 0;
163
164	lockdep_assert_held(&cxl_region_rwsem);
165	cxlr = to_cxl_region(match);
166	p = &cxlr->params;
167
168	if (uuid_equal(&p->uuid, uuid)) {
169		dev_dbg(match, "already has uuid: %pUb\n", uuid);
170		return -EBUSY;
171	}
172
173	return 0;
174}
175
176static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
177			  const char *buf, size_t len)
178{
179	struct cxl_region *cxlr = to_cxl_region(dev);
180	struct cxl_region_params *p = &cxlr->params;
181	uuid_t temp;
182	ssize_t rc;
183
184	if (len != UUID_STRING_LEN + 1)
185		return -EINVAL;
186
187	rc = uuid_parse(buf, &temp);
188	if (rc)
189		return rc;
190
191	if (uuid_is_null(&temp))
192		return -EINVAL;
193
194	rc = down_write_killable(&cxl_region_rwsem);
195	if (rc)
196		return rc;
197
198	if (uuid_equal(&p->uuid, &temp))
199		goto out;
200
201	rc = -EBUSY;
202	if (p->state >= CXL_CONFIG_ACTIVE)
203		goto out;
204
205	rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
206	if (rc < 0)
207		goto out;
208
209	uuid_copy(&p->uuid, &temp);
210out:
211	up_write(&cxl_region_rwsem);
212
213	if (rc)
214		return rc;
215	return len;
216}
217static DEVICE_ATTR_RW(uuid);
218
219static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
220					  struct cxl_region *cxlr)
221{
222	return xa_load(&port->regions, (unsigned long)cxlr);
223}
224
225static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
226{
227	if (!cpu_cache_has_invalidate_memregion()) {
228		if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
229			dev_info_once(
230				&cxlr->dev,
231				"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
232			return 0;
233		} else {
234			dev_err(&cxlr->dev,
235				"Failed to synchronize CPU cache state\n");
236			return -ENXIO;
237		}
238	}
239
240	cpu_cache_invalidate_memregion(IORES_DESC_CXL);
241	return 0;
242}
243
244static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
245{
246	struct cxl_region_params *p = &cxlr->params;
247	int i, rc = 0;
248
249	/*
250	 * Before region teardown attempt to flush, and if the flush
251	 * fails cancel the region teardown for data consistency
252	 * concerns
253	 */
254	rc = cxl_region_invalidate_memregion(cxlr);
255	if (rc)
256		return rc;
257
258	for (i = count - 1; i >= 0; i--) {
259		struct cxl_endpoint_decoder *cxled = p->targets[i];
260		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
261		struct cxl_port *iter = cxled_to_port(cxled);
262		struct cxl_dev_state *cxlds = cxlmd->cxlds;
263		struct cxl_ep *ep;
264
265		if (cxlds->rcd)
266			goto endpoint_reset;
267
268		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
269			iter = to_cxl_port(iter->dev.parent);
270
271		for (ep = cxl_ep_load(iter, cxlmd); iter;
272		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
273			struct cxl_region_ref *cxl_rr;
274			struct cxl_decoder *cxld;
275
276			cxl_rr = cxl_rr_load(iter, cxlr);
277			cxld = cxl_rr->decoder;
278			if (cxld->reset)
279				rc = cxld->reset(cxld);
280			if (rc)
281				return rc;
282			set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
283		}
284
285endpoint_reset:
286		rc = cxled->cxld.reset(&cxled->cxld);
287		if (rc)
288			return rc;
289		set_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
290	}
291
292	/* all decoders associated with this region have been torn down */
293	clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
294
295	return 0;
296}
297
298static int commit_decoder(struct cxl_decoder *cxld)
299{
300	struct cxl_switch_decoder *cxlsd = NULL;
301
302	if (cxld->commit)
303		return cxld->commit(cxld);
304
305	if (is_switch_decoder(&cxld->dev))
306		cxlsd = to_cxl_switch_decoder(&cxld->dev);
307
308	if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1,
309			  "->commit() is required\n"))
310		return -ENXIO;
311	return 0;
312}
313
314static int cxl_region_decode_commit(struct cxl_region *cxlr)
315{
316	struct cxl_region_params *p = &cxlr->params;
317	int i, rc = 0;
318
319	for (i = 0; i < p->nr_targets; i++) {
320		struct cxl_endpoint_decoder *cxled = p->targets[i];
321		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
322		struct cxl_region_ref *cxl_rr;
323		struct cxl_decoder *cxld;
324		struct cxl_port *iter;
325		struct cxl_ep *ep;
326
327		/* commit bottom up */
328		for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
329		     iter = to_cxl_port(iter->dev.parent)) {
330			cxl_rr = cxl_rr_load(iter, cxlr);
331			cxld = cxl_rr->decoder;
332			rc = commit_decoder(cxld);
333			if (rc)
334				break;
335		}
336
337		if (rc) {
338			/* programming @iter failed, teardown */
339			for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
340			     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
341				cxl_rr = cxl_rr_load(iter, cxlr);
342				cxld = cxl_rr->decoder;
343				if (cxld->reset)
344					cxld->reset(cxld);
345			}
346
347			cxled->cxld.reset(&cxled->cxld);
348			goto err;
349		}
350	}
351
352	return 0;
353
354err:
355	/* undo the targets that were successfully committed */
356	cxl_region_decode_reset(cxlr, i);
357	return rc;
358}
359
360static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
361			    const char *buf, size_t len)
362{
363	struct cxl_region *cxlr = to_cxl_region(dev);
364	struct cxl_region_params *p = &cxlr->params;
365	bool commit;
366	ssize_t rc;
367
368	rc = kstrtobool(buf, &commit);
369	if (rc)
370		return rc;
371
372	rc = down_write_killable(&cxl_region_rwsem);
373	if (rc)
374		return rc;
375
376	/* Already in the requested state? */
377	if (commit && p->state >= CXL_CONFIG_COMMIT)
378		goto out;
379	if (!commit && p->state < CXL_CONFIG_COMMIT)
380		goto out;
381
382	/* Not ready to commit? */
383	if (commit && p->state < CXL_CONFIG_ACTIVE) {
384		rc = -ENXIO;
385		goto out;
386	}
387
388	/*
389	 * Invalidate caches before region setup to drop any speculative
390	 * consumption of this address space
391	 */
392	rc = cxl_region_invalidate_memregion(cxlr);
393	if (rc)
394		goto out;
395
396	if (commit) {
397		rc = cxl_region_decode_commit(cxlr);
398		if (rc == 0)
399			p->state = CXL_CONFIG_COMMIT;
400	} else {
401		p->state = CXL_CONFIG_RESET_PENDING;
402		up_write(&cxl_region_rwsem);
403		device_release_driver(&cxlr->dev);
404		down_write(&cxl_region_rwsem);
405
406		/*
407		 * The lock was dropped, so need to revalidate that the reset is
408		 * still pending.
409		 */
410		if (p->state == CXL_CONFIG_RESET_PENDING) {
411			rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
412			/*
413			 * Revert to committed since there may still be active
414			 * decoders associated with this region, or move forward
415			 * to active to mark the reset successful
416			 */
417			if (rc)
418				p->state = CXL_CONFIG_COMMIT;
419			else
420				p->state = CXL_CONFIG_ACTIVE;
421		}
422	}
423
424out:
425	up_write(&cxl_region_rwsem);
426
427	if (rc)
428		return rc;
429	return len;
430}
431
432static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
433			   char *buf)
434{
435	struct cxl_region *cxlr = to_cxl_region(dev);
436	struct cxl_region_params *p = &cxlr->params;
437	ssize_t rc;
438
439	rc = down_read_interruptible(&cxl_region_rwsem);
440	if (rc)
441		return rc;
442	rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
443	up_read(&cxl_region_rwsem);
444
445	return rc;
446}
447static DEVICE_ATTR_RW(commit);
448
449static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
450				  int n)
451{
452	struct device *dev = kobj_to_dev(kobj);
453	struct cxl_region *cxlr = to_cxl_region(dev);
454
455	/*
456	 * Support tooling that expects to find a 'uuid' attribute for all
457	 * regions regardless of mode.
458	 */
459	if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
460		return 0444;
461	return a->mode;
462}
463
464static ssize_t interleave_ways_show(struct device *dev,
465				    struct device_attribute *attr, char *buf)
466{
467	struct cxl_region *cxlr = to_cxl_region(dev);
468	struct cxl_region_params *p = &cxlr->params;
469	ssize_t rc;
470
471	rc = down_read_interruptible(&cxl_region_rwsem);
472	if (rc)
473		return rc;
474	rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
475	up_read(&cxl_region_rwsem);
476
477	return rc;
478}
479
480static const struct attribute_group *get_cxl_region_target_group(void);
481
482static ssize_t interleave_ways_store(struct device *dev,
483				     struct device_attribute *attr,
484				     const char *buf, size_t len)
485{
486	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
487	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
488	struct cxl_region *cxlr = to_cxl_region(dev);
489	struct cxl_region_params *p = &cxlr->params;
490	unsigned int val, save;
491	int rc;
492	u8 iw;
493
494	rc = kstrtouint(buf, 0, &val);
495	if (rc)
496		return rc;
497
498	rc = ways_to_eiw(val, &iw);
499	if (rc)
500		return rc;
501
502	/*
503	 * Even for x3, x6, and x12 interleaves the region interleave must be a
504	 * power of 2 multiple of the host bridge interleave.
505	 */
506	if (!is_power_of_2(val / cxld->interleave_ways) ||
507	    (val % cxld->interleave_ways)) {
508		dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
509		return -EINVAL;
510	}
511
512	rc = down_write_killable(&cxl_region_rwsem);
513	if (rc)
514		return rc;
515	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
516		rc = -EBUSY;
517		goto out;
518	}
519
520	save = p->interleave_ways;
521	p->interleave_ways = val;
522	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
523	if (rc)
524		p->interleave_ways = save;
525out:
526	up_write(&cxl_region_rwsem);
527	if (rc)
528		return rc;
529	return len;
530}
531static DEVICE_ATTR_RW(interleave_ways);
532
533static ssize_t interleave_granularity_show(struct device *dev,
534					   struct device_attribute *attr,
535					   char *buf)
536{
537	struct cxl_region *cxlr = to_cxl_region(dev);
538	struct cxl_region_params *p = &cxlr->params;
539	ssize_t rc;
540
541	rc = down_read_interruptible(&cxl_region_rwsem);
542	if (rc)
543		return rc;
544	rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
545	up_read(&cxl_region_rwsem);
546
547	return rc;
548}
549
550static ssize_t interleave_granularity_store(struct device *dev,
551					    struct device_attribute *attr,
552					    const char *buf, size_t len)
553{
554	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
555	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
556	struct cxl_region *cxlr = to_cxl_region(dev);
557	struct cxl_region_params *p = &cxlr->params;
558	int rc, val;
559	u16 ig;
560
561	rc = kstrtoint(buf, 0, &val);
562	if (rc)
563		return rc;
564
565	rc = granularity_to_eig(val, &ig);
566	if (rc)
567		return rc;
568
569	/*
570	 * When the host-bridge is interleaved, disallow region granularity !=
571	 * root granularity. Regions with a granularity less than the root
572	 * interleave result in needing multiple endpoints to support a single
573	 * slot in the interleave (possible to support in the future). Regions
574	 * with a granularity greater than the root interleave result in invalid
575	 * DPA translations (invalid to support).
576	 */
577	if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
578		return -EINVAL;
579
580	rc = down_write_killable(&cxl_region_rwsem);
581	if (rc)
582		return rc;
583	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
584		rc = -EBUSY;
585		goto out;
586	}
587
588	p->interleave_granularity = val;
589out:
590	up_write(&cxl_region_rwsem);
591	if (rc)
592		return rc;
593	return len;
594}
595static DEVICE_ATTR_RW(interleave_granularity);
596
597static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
598			     char *buf)
599{
600	struct cxl_region *cxlr = to_cxl_region(dev);
601	struct cxl_region_params *p = &cxlr->params;
602	u64 resource = -1ULL;
603	ssize_t rc;
604
605	rc = down_read_interruptible(&cxl_region_rwsem);
606	if (rc)
607		return rc;
608	if (p->res)
609		resource = p->res->start;
610	rc = sysfs_emit(buf, "%#llx\n", resource);
611	up_read(&cxl_region_rwsem);
612
613	return rc;
614}
615static DEVICE_ATTR_RO(resource);
616
617static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
618			 char *buf)
619{
620	struct cxl_region *cxlr = to_cxl_region(dev);
621
622	return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
623}
624static DEVICE_ATTR_RO(mode);
625
626static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
627{
628	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
629	struct cxl_region_params *p = &cxlr->params;
630	struct resource *res;
631	u64 remainder = 0;
632
633	lockdep_assert_held_write(&cxl_region_rwsem);
634
635	/* Nothing to do... */
636	if (p->res && resource_size(p->res) == size)
637		return 0;
638
639	/* To change size the old size must be freed first */
640	if (p->res)
641		return -EBUSY;
642
643	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
644		return -EBUSY;
645
646	/* ways, granularity and uuid (if PMEM) need to be set before HPA */
647	if (!p->interleave_ways || !p->interleave_granularity ||
648	    (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
649		return -ENXIO;
650
651	div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder);
652	if (remainder)
653		return -EINVAL;
654
655	res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
656				    dev_name(&cxlr->dev));
657	if (IS_ERR(res)) {
658		dev_dbg(&cxlr->dev,
659			"HPA allocation error (%ld) for size:%pap in %s %pr\n",
660			PTR_ERR(res), &size, cxlrd->res->name, cxlrd->res);
661		return PTR_ERR(res);
662	}
663
664	p->res = res;
665	p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
666
667	return 0;
668}
669
670static void cxl_region_iomem_release(struct cxl_region *cxlr)
671{
672	struct cxl_region_params *p = &cxlr->params;
673
674	if (device_is_registered(&cxlr->dev))
675		lockdep_assert_held_write(&cxl_region_rwsem);
676	if (p->res) {
677		/*
678		 * Autodiscovered regions may not have been able to insert their
679		 * resource.
680		 */
681		if (p->res->parent)
682			remove_resource(p->res);
683		kfree(p->res);
684		p->res = NULL;
685	}
686}
687
688static int free_hpa(struct cxl_region *cxlr)
689{
690	struct cxl_region_params *p = &cxlr->params;
691
692	lockdep_assert_held_write(&cxl_region_rwsem);
693
694	if (!p->res)
695		return 0;
696
697	if (p->state >= CXL_CONFIG_ACTIVE)
698		return -EBUSY;
699
700	cxl_region_iomem_release(cxlr);
701	p->state = CXL_CONFIG_IDLE;
702	return 0;
703}
704
705static ssize_t size_store(struct device *dev, struct device_attribute *attr,
706			  const char *buf, size_t len)
707{
708	struct cxl_region *cxlr = to_cxl_region(dev);
709	u64 val;
710	int rc;
711
712	rc = kstrtou64(buf, 0, &val);
713	if (rc)
714		return rc;
715
716	rc = down_write_killable(&cxl_region_rwsem);
717	if (rc)
718		return rc;
719
720	if (val)
721		rc = alloc_hpa(cxlr, val);
722	else
723		rc = free_hpa(cxlr);
724	up_write(&cxl_region_rwsem);
725
726	if (rc)
727		return rc;
728
729	return len;
730}
731
732static ssize_t size_show(struct device *dev, struct device_attribute *attr,
733			 char *buf)
734{
735	struct cxl_region *cxlr = to_cxl_region(dev);
736	struct cxl_region_params *p = &cxlr->params;
737	u64 size = 0;
738	ssize_t rc;
739
740	rc = down_read_interruptible(&cxl_region_rwsem);
741	if (rc)
742		return rc;
743	if (p->res)
744		size = resource_size(p->res);
745	rc = sysfs_emit(buf, "%#llx\n", size);
746	up_read(&cxl_region_rwsem);
747
748	return rc;
749}
750static DEVICE_ATTR_RW(size);
751
752static struct attribute *cxl_region_attrs[] = {
753	&dev_attr_uuid.attr,
754	&dev_attr_commit.attr,
755	&dev_attr_interleave_ways.attr,
756	&dev_attr_interleave_granularity.attr,
757	&dev_attr_resource.attr,
758	&dev_attr_size.attr,
759	&dev_attr_mode.attr,
760	NULL,
761};
762
763static const struct attribute_group cxl_region_group = {
764	.attrs = cxl_region_attrs,
765	.is_visible = cxl_region_visible,
766};
767
768static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
769{
770	struct cxl_region_params *p = &cxlr->params;
771	struct cxl_endpoint_decoder *cxled;
772	int rc;
773
774	rc = down_read_interruptible(&cxl_region_rwsem);
775	if (rc)
776		return rc;
777
778	if (pos >= p->interleave_ways) {
779		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
780			p->interleave_ways);
781		rc = -ENXIO;
782		goto out;
783	}
784
785	cxled = p->targets[pos];
786	if (!cxled)
787		rc = sysfs_emit(buf, "\n");
788	else
789		rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
790out:
791	up_read(&cxl_region_rwsem);
792
793	return rc;
794}
795
796static int match_free_decoder(struct device *dev, void *data)
797{
798	struct cxl_decoder *cxld;
799	int *id = data;
800
801	if (!is_switch_decoder(dev))
802		return 0;
803
804	cxld = to_cxl_decoder(dev);
805
806	/* enforce ordered allocation */
807	if (cxld->id != *id)
808		return 0;
809
810	if (!cxld->region)
811		return 1;
812
813	(*id)++;
814
815	return 0;
816}
817
818static int match_auto_decoder(struct device *dev, void *data)
819{
820	struct cxl_region_params *p = data;
821	struct cxl_decoder *cxld;
822	struct range *r;
823
824	if (!is_switch_decoder(dev))
825		return 0;
826
827	cxld = to_cxl_decoder(dev);
828	r = &cxld->hpa_range;
829
830	if (p->res && p->res->start == r->start && p->res->end == r->end)
831		return 1;
832
833	return 0;
834}
835
836static struct cxl_decoder *
837cxl_region_find_decoder(struct cxl_port *port,
838			struct cxl_endpoint_decoder *cxled,
839			struct cxl_region *cxlr)
840{
841	struct device *dev;
842	int id = 0;
843
844	if (port == cxled_to_port(cxled))
845		return &cxled->cxld;
846
847	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
848		dev = device_find_child(&port->dev, &cxlr->params,
849					match_auto_decoder);
850	else
851		dev = device_find_child(&port->dev, &id, match_free_decoder);
852	if (!dev)
853		return NULL;
854	/*
855	 * This decoder is pinned registered as long as the endpoint decoder is
856	 * registered, and endpoint decoder unregistration holds the
857	 * cxl_region_rwsem over unregister events, so no need to hold on to
858	 * this extra reference.
859	 */
860	put_device(dev);
861	return to_cxl_decoder(dev);
862}
863
864static bool auto_order_ok(struct cxl_port *port, struct cxl_region *cxlr_iter,
865			  struct cxl_decoder *cxld)
866{
867	struct cxl_region_ref *rr = cxl_rr_load(port, cxlr_iter);
868	struct cxl_decoder *cxld_iter = rr->decoder;
869
870	/*
871	 * Allow the out of order assembly of auto-discovered regions.
872	 * Per CXL Spec 3.1 8.2.4.20.12 software must commit decoders
873	 * in HPA order. Confirm that the decoder with the lesser HPA
874	 * starting address has the lesser id.
875	 */
876	dev_dbg(&cxld->dev, "check for HPA violation %s:%d < %s:%d\n",
877		dev_name(&cxld->dev), cxld->id,
878		dev_name(&cxld_iter->dev), cxld_iter->id);
879
880	if (cxld_iter->id > cxld->id)
881		return true;
882
883	return false;
884}
885
886static struct cxl_region_ref *
887alloc_region_ref(struct cxl_port *port, struct cxl_region *cxlr,
888		 struct cxl_endpoint_decoder *cxled)
889{
890	struct cxl_region_params *p = &cxlr->params;
891	struct cxl_region_ref *cxl_rr, *iter;
892	unsigned long index;
893	int rc;
894
895	xa_for_each(&port->regions, index, iter) {
896		struct cxl_region_params *ip = &iter->region->params;
897
898		if (!ip->res || ip->res->start < p->res->start)
899			continue;
900
901		if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
902			struct cxl_decoder *cxld;
903
904			cxld = cxl_region_find_decoder(port, cxled, cxlr);
905			if (auto_order_ok(port, iter->region, cxld))
906				continue;
907		}
908		dev_dbg(&cxlr->dev, "%s: HPA order violation %s:%pr vs %pr\n",
909			dev_name(&port->dev),
910			dev_name(&iter->region->dev), ip->res, p->res);
911
912		return ERR_PTR(-EBUSY);
913	}
914
915	cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
916	if (!cxl_rr)
917		return ERR_PTR(-ENOMEM);
918	cxl_rr->port = port;
919	cxl_rr->region = cxlr;
920	cxl_rr->nr_targets = 1;
921	xa_init(&cxl_rr->endpoints);
922
923	rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
924	if (rc) {
925		dev_dbg(&cxlr->dev,
926			"%s: failed to track region reference: %d\n",
927			dev_name(&port->dev), rc);
928		kfree(cxl_rr);
929		return ERR_PTR(rc);
930	}
931
932	return cxl_rr;
933}
934
935static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
936{
937	struct cxl_region *cxlr = cxl_rr->region;
938	struct cxl_decoder *cxld = cxl_rr->decoder;
939
940	if (!cxld)
941		return;
942
943	dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
944	if (cxld->region == cxlr) {
945		cxld->region = NULL;
946		put_device(&cxlr->dev);
947	}
948}
949
950static void free_region_ref(struct cxl_region_ref *cxl_rr)
951{
952	struct cxl_port *port = cxl_rr->port;
953	struct cxl_region *cxlr = cxl_rr->region;
954
955	cxl_rr_free_decoder(cxl_rr);
956	xa_erase(&port->regions, (unsigned long)cxlr);
957	xa_destroy(&cxl_rr->endpoints);
958	kfree(cxl_rr);
959}
960
961static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
962			 struct cxl_endpoint_decoder *cxled)
963{
964	int rc;
965	struct cxl_port *port = cxl_rr->port;
966	struct cxl_region *cxlr = cxl_rr->region;
967	struct cxl_decoder *cxld = cxl_rr->decoder;
968	struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
969
970	if (ep) {
971		rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
972			       GFP_KERNEL);
973		if (rc)
974			return rc;
975	}
976	cxl_rr->nr_eps++;
977
978	if (!cxld->region) {
979		cxld->region = cxlr;
980		get_device(&cxlr->dev);
981	}
982
983	return 0;
984}
985
986static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
987				struct cxl_endpoint_decoder *cxled,
988				struct cxl_region_ref *cxl_rr)
989{
990	struct cxl_decoder *cxld;
991
992	cxld = cxl_region_find_decoder(port, cxled, cxlr);
993	if (!cxld) {
994		dev_dbg(&cxlr->dev, "%s: no decoder available\n",
995			dev_name(&port->dev));
996		return -EBUSY;
997	}
998
999	if (cxld->region) {
1000		dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
1001			dev_name(&port->dev), dev_name(&cxld->dev),
1002			dev_name(&cxld->region->dev));
1003		return -EBUSY;
1004	}
1005
1006	/*
1007	 * Endpoints should already match the region type, but backstop that
1008	 * assumption with an assertion. Switch-decoders change mapping-type
1009	 * based on what is mapped when they are assigned to a region.
1010	 */
1011	dev_WARN_ONCE(&cxlr->dev,
1012		      port == cxled_to_port(cxled) &&
1013			      cxld->target_type != cxlr->type,
1014		      "%s:%s mismatch decoder type %d -> %d\n",
1015		      dev_name(&cxled_to_memdev(cxled)->dev),
1016		      dev_name(&cxld->dev), cxld->target_type, cxlr->type);
1017	cxld->target_type = cxlr->type;
1018	cxl_rr->decoder = cxld;
1019	return 0;
1020}
1021
1022/**
1023 * cxl_port_attach_region() - track a region's interest in a port by endpoint
1024 * @port: port to add a new region reference 'struct cxl_region_ref'
1025 * @cxlr: region to attach to @port
1026 * @cxled: endpoint decoder used to create or further pin a region reference
1027 * @pos: interleave position of @cxled in @cxlr
1028 *
1029 * The attach event is an opportunity to validate CXL decode setup
1030 * constraints and record metadata needed for programming HDM decoders,
1031 * in particular decoder target lists.
1032 *
1033 * The steps are:
1034 *
1035 * - validate that there are no other regions with a higher HPA already
1036 *   associated with @port
1037 * - establish a region reference if one is not already present
1038 *
1039 *   - additionally allocate a decoder instance that will host @cxlr on
1040 *     @port
1041 *
1042 * - pin the region reference by the endpoint
1043 * - account for how many entries in @port's target list are needed to
1044 *   cover all of the added endpoints.
1045 */
1046static int cxl_port_attach_region(struct cxl_port *port,
1047				  struct cxl_region *cxlr,
1048				  struct cxl_endpoint_decoder *cxled, int pos)
1049{
1050	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1051	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1052	struct cxl_region_ref *cxl_rr;
1053	bool nr_targets_inc = false;
1054	struct cxl_decoder *cxld;
1055	unsigned long index;
1056	int rc = -EBUSY;
1057
1058	lockdep_assert_held_write(&cxl_region_rwsem);
1059
1060	cxl_rr = cxl_rr_load(port, cxlr);
1061	if (cxl_rr) {
1062		struct cxl_ep *ep_iter;
1063		int found = 0;
1064
1065		/*
1066		 * Walk the existing endpoints that have been attached to
1067		 * @cxlr at @port and see if they share the same 'next' port
1068		 * in the downstream direction. I.e. endpoints that share common
1069		 * upstream switch.
1070		 */
1071		xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1072			if (ep_iter == ep)
1073				continue;
1074			if (ep_iter->next == ep->next) {
1075				found++;
1076				break;
1077			}
1078		}
1079
1080		/*
1081		 * New target port, or @port is an endpoint port that always
1082		 * accounts its own local decode as a target.
1083		 */
1084		if (!found || !ep->next) {
1085			cxl_rr->nr_targets++;
1086			nr_targets_inc = true;
1087		}
1088	} else {
1089		cxl_rr = alloc_region_ref(port, cxlr, cxled);
1090		if (IS_ERR(cxl_rr)) {
1091			dev_dbg(&cxlr->dev,
1092				"%s: failed to allocate region reference\n",
1093				dev_name(&port->dev));
1094			return PTR_ERR(cxl_rr);
1095		}
1096		nr_targets_inc = true;
1097
1098		rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
1099		if (rc)
1100			goto out_erase;
1101	}
1102	cxld = cxl_rr->decoder;
1103
1104	rc = cxl_rr_ep_add(cxl_rr, cxled);
1105	if (rc) {
1106		dev_dbg(&cxlr->dev,
1107			"%s: failed to track endpoint %s:%s reference\n",
1108			dev_name(&port->dev), dev_name(&cxlmd->dev),
1109			dev_name(&cxld->dev));
1110		goto out_erase;
1111	}
1112
1113	dev_dbg(&cxlr->dev,
1114		"%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
1115		dev_name(port->uport_dev), dev_name(&port->dev),
1116		dev_name(&cxld->dev), dev_name(&cxlmd->dev),
1117		dev_name(&cxled->cxld.dev), pos,
1118		ep ? ep->next ? dev_name(ep->next->uport_dev) :
1119				      dev_name(&cxlmd->dev) :
1120			   "none",
1121		cxl_rr->nr_eps, cxl_rr->nr_targets);
1122
1123	return 0;
1124out_erase:
1125	if (nr_targets_inc)
1126		cxl_rr->nr_targets--;
1127	if (cxl_rr->nr_eps == 0)
1128		free_region_ref(cxl_rr);
1129	return rc;
1130}
1131
1132static void cxl_port_detach_region(struct cxl_port *port,
1133				   struct cxl_region *cxlr,
1134				   struct cxl_endpoint_decoder *cxled)
1135{
1136	struct cxl_region_ref *cxl_rr;
1137	struct cxl_ep *ep = NULL;
1138
1139	lockdep_assert_held_write(&cxl_region_rwsem);
1140
1141	cxl_rr = cxl_rr_load(port, cxlr);
1142	if (!cxl_rr)
1143		return;
1144
1145	/*
1146	 * Endpoint ports do not carry cxl_ep references, and they
1147	 * never target more than one endpoint by definition
1148	 */
1149	if (cxl_rr->decoder == &cxled->cxld)
1150		cxl_rr->nr_eps--;
1151	else
1152		ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
1153	if (ep) {
1154		struct cxl_ep *ep_iter;
1155		unsigned long index;
1156		int found = 0;
1157
1158		cxl_rr->nr_eps--;
1159		xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
1160			if (ep_iter->next == ep->next) {
1161				found++;
1162				break;
1163			}
1164		}
1165		if (!found)
1166			cxl_rr->nr_targets--;
1167	}
1168
1169	if (cxl_rr->nr_eps == 0)
1170		free_region_ref(cxl_rr);
1171}
1172
1173static int check_last_peer(struct cxl_endpoint_decoder *cxled,
1174			   struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
1175			   int distance)
1176{
1177	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1178	struct cxl_region *cxlr = cxl_rr->region;
1179	struct cxl_region_params *p = &cxlr->params;
1180	struct cxl_endpoint_decoder *cxled_peer;
1181	struct cxl_port *port = cxl_rr->port;
1182	struct cxl_memdev *cxlmd_peer;
1183	struct cxl_ep *ep_peer;
1184	int pos = cxled->pos;
1185
1186	/*
1187	 * If this position wants to share a dport with the last endpoint mapped
1188	 * then that endpoint, at index 'position - distance', must also be
1189	 * mapped by this dport.
1190	 */
1191	if (pos < distance) {
1192		dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
1193			dev_name(port->uport_dev), dev_name(&port->dev),
1194			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1195		return -ENXIO;
1196	}
1197	cxled_peer = p->targets[pos - distance];
1198	cxlmd_peer = cxled_to_memdev(cxled_peer);
1199	ep_peer = cxl_ep_load(port, cxlmd_peer);
1200	if (ep->dport != ep_peer->dport) {
1201		dev_dbg(&cxlr->dev,
1202			"%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
1203			dev_name(port->uport_dev), dev_name(&port->dev),
1204			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
1205			dev_name(&cxlmd_peer->dev),
1206			dev_name(&cxled_peer->cxld.dev));
1207		return -ENXIO;
1208	}
1209
1210	return 0;
1211}
1212
1213static int cxl_port_setup_targets(struct cxl_port *port,
1214				  struct cxl_region *cxlr,
1215				  struct cxl_endpoint_decoder *cxled)
1216{
1217	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1218	int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
1219	struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
1220	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1221	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1222	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
1223	struct cxl_region_params *p = &cxlr->params;
1224	struct cxl_decoder *cxld = cxl_rr->decoder;
1225	struct cxl_switch_decoder *cxlsd;
1226	u16 eig, peig;
1227	u8 eiw, peiw;
1228
1229	/*
1230	 * While root level decoders support x3, x6, x12, switch level
1231	 * decoders only support powers of 2 up to x16.
1232	 */
1233	if (!is_power_of_2(cxl_rr->nr_targets)) {
1234		dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
1235			dev_name(port->uport_dev), dev_name(&port->dev),
1236			cxl_rr->nr_targets);
1237		return -EINVAL;
1238	}
1239
1240	cxlsd = to_cxl_switch_decoder(&cxld->dev);
1241	if (cxl_rr->nr_targets_set) {
1242		int i, distance;
1243
1244		/*
1245		 * Passthrough decoders impose no distance requirements between
1246		 * peers
1247		 */
1248		if (cxl_rr->nr_targets == 1)
1249			distance = 0;
1250		else
1251			distance = p->nr_targets / cxl_rr->nr_targets;
1252		for (i = 0; i < cxl_rr->nr_targets_set; i++)
1253			if (ep->dport == cxlsd->target[i]) {
1254				rc = check_last_peer(cxled, ep, cxl_rr,
1255						     distance);
1256				if (rc)
1257					return rc;
1258				goto out_target_set;
1259			}
1260		goto add_target;
1261	}
1262
1263	if (is_cxl_root(parent_port)) {
1264		/*
1265		 * Root decoder IG is always set to value in CFMWS which
1266		 * may be different than this region's IG.  We can use the
1267		 * region's IG here since interleave_granularity_store()
1268		 * does not allow interleaved host-bridges with
1269		 * root IG != region IG.
1270		 */
1271		parent_ig = p->interleave_granularity;
1272		parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1273		/*
1274		 * For purposes of address bit routing, use power-of-2 math for
1275		 * switch ports.
1276		 */
1277		if (!is_power_of_2(parent_iw))
1278			parent_iw /= 3;
1279	} else {
1280		struct cxl_region_ref *parent_rr;
1281		struct cxl_decoder *parent_cxld;
1282
1283		parent_rr = cxl_rr_load(parent_port, cxlr);
1284		parent_cxld = parent_rr->decoder;
1285		parent_ig = parent_cxld->interleave_granularity;
1286		parent_iw = parent_cxld->interleave_ways;
1287	}
1288
1289	rc = granularity_to_eig(parent_ig, &peig);
1290	if (rc) {
1291		dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1292			dev_name(parent_port->uport_dev),
1293			dev_name(&parent_port->dev), parent_ig);
1294		return rc;
1295	}
1296
1297	rc = ways_to_eiw(parent_iw, &peiw);
1298	if (rc) {
1299		dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1300			dev_name(parent_port->uport_dev),
1301			dev_name(&parent_port->dev), parent_iw);
1302		return rc;
1303	}
1304
1305	iw = cxl_rr->nr_targets;
1306	rc = ways_to_eiw(iw, &eiw);
1307	if (rc) {
1308		dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1309			dev_name(port->uport_dev), dev_name(&port->dev), iw);
1310		return rc;
1311	}
1312
1313	/*
1314	 * Interleave granularity is a multiple of @parent_port granularity.
1315	 * Multiplier is the parent port interleave ways.
1316	 */
1317	rc = granularity_to_eig(parent_ig * parent_iw, &eig);
1318	if (rc) {
1319		dev_dbg(&cxlr->dev,
1320			"%s: invalid granularity calculation (%d * %d)\n",
1321			dev_name(&parent_port->dev), parent_ig, parent_iw);
1322		return rc;
1323	}
1324
1325	rc = eig_to_granularity(eig, &ig);
1326	if (rc) {
1327		dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1328			dev_name(port->uport_dev), dev_name(&port->dev),
1329			256 << eig);
1330		return rc;
1331	}
1332
1333	if (iw > 8 || iw > cxlsd->nr_targets) {
1334		dev_dbg(&cxlr->dev,
1335			"%s:%s:%s: ways: %d overflows targets: %d\n",
1336			dev_name(port->uport_dev), dev_name(&port->dev),
1337			dev_name(&cxld->dev), iw, cxlsd->nr_targets);
1338		return -ENXIO;
1339	}
1340
1341	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1342		if (cxld->interleave_ways != iw ||
1343		    cxld->interleave_granularity != ig ||
1344		    cxld->hpa_range.start != p->res->start ||
1345		    cxld->hpa_range.end != p->res->end ||
1346		    ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
1347			dev_err(&cxlr->dev,
1348				"%s:%s %s expected iw: %d ig: %d %pr\n",
1349				dev_name(port->uport_dev), dev_name(&port->dev),
1350				__func__, iw, ig, p->res);
1351			dev_err(&cxlr->dev,
1352				"%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
1353				dev_name(port->uport_dev), dev_name(&port->dev),
1354				__func__, cxld->interleave_ways,
1355				cxld->interleave_granularity,
1356				(cxld->flags & CXL_DECODER_F_ENABLE) ?
1357					"enabled" :
1358					"disabled",
1359				cxld->hpa_range.start, cxld->hpa_range.end);
1360			return -ENXIO;
1361		}
1362	} else {
1363		cxld->interleave_ways = iw;
1364		cxld->interleave_granularity = ig;
1365		cxld->hpa_range = (struct range) {
1366			.start = p->res->start,
1367			.end = p->res->end,
1368		};
1369	}
1370	dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport_dev),
1371		dev_name(&port->dev), iw, ig);
1372add_target:
1373	if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
1374		dev_dbg(&cxlr->dev,
1375			"%s:%s: targets full trying to add %s:%s at %d\n",
1376			dev_name(port->uport_dev), dev_name(&port->dev),
1377			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1378		return -ENXIO;
1379	}
1380	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1381		if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
1382			dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
1383				dev_name(port->uport_dev), dev_name(&port->dev),
1384				dev_name(&cxlsd->cxld.dev),
1385				dev_name(ep->dport->dport_dev),
1386				cxl_rr->nr_targets_set);
1387			return -ENXIO;
1388		}
1389	} else
1390		cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
1391	inc = 1;
1392out_target_set:
1393	cxl_rr->nr_targets_set += inc;
1394	dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
1395		dev_name(port->uport_dev), dev_name(&port->dev),
1396		cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev),
1397		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1398
1399	return 0;
1400}
1401
1402static void cxl_port_reset_targets(struct cxl_port *port,
1403				   struct cxl_region *cxlr)
1404{
1405	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1406	struct cxl_decoder *cxld;
1407
1408	/*
1409	 * After the last endpoint has been detached the entire cxl_rr may now
1410	 * be gone.
1411	 */
1412	if (!cxl_rr)
1413		return;
1414	cxl_rr->nr_targets_set = 0;
1415
1416	cxld = cxl_rr->decoder;
1417	cxld->hpa_range = (struct range) {
1418		.start = 0,
1419		.end = -1,
1420	};
1421}
1422
1423static void cxl_region_teardown_targets(struct cxl_region *cxlr)
1424{
1425	struct cxl_region_params *p = &cxlr->params;
1426	struct cxl_endpoint_decoder *cxled;
1427	struct cxl_dev_state *cxlds;
1428	struct cxl_memdev *cxlmd;
1429	struct cxl_port *iter;
1430	struct cxl_ep *ep;
1431	int i;
1432
1433	/*
1434	 * In the auto-discovery case skip automatic teardown since the
1435	 * address space is already active
1436	 */
1437	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
1438		return;
1439
1440	for (i = 0; i < p->nr_targets; i++) {
1441		cxled = p->targets[i];
1442		cxlmd = cxled_to_memdev(cxled);
1443		cxlds = cxlmd->cxlds;
1444
1445		if (cxlds->rcd)
1446			continue;
1447
1448		iter = cxled_to_port(cxled);
1449		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1450			iter = to_cxl_port(iter->dev.parent);
1451
1452		for (ep = cxl_ep_load(iter, cxlmd); iter;
1453		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
1454			cxl_port_reset_targets(iter, cxlr);
1455	}
1456}
1457
1458static int cxl_region_setup_targets(struct cxl_region *cxlr)
1459{
1460	struct cxl_region_params *p = &cxlr->params;
1461	struct cxl_endpoint_decoder *cxled;
1462	struct cxl_dev_state *cxlds;
1463	int i, rc, rch = 0, vh = 0;
1464	struct cxl_memdev *cxlmd;
1465	struct cxl_port *iter;
1466	struct cxl_ep *ep;
1467
1468	for (i = 0; i < p->nr_targets; i++) {
1469		cxled = p->targets[i];
1470		cxlmd = cxled_to_memdev(cxled);
1471		cxlds = cxlmd->cxlds;
1472
1473		/* validate that all targets agree on topology */
1474		if (!cxlds->rcd) {
1475			vh++;
1476		} else {
1477			rch++;
1478			continue;
1479		}
1480
1481		iter = cxled_to_port(cxled);
1482		while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1483			iter = to_cxl_port(iter->dev.parent);
1484
1485		/*
1486		 * Descend the topology tree programming / validating
1487		 * targets while looking for conflicts.
1488		 */
1489		for (ep = cxl_ep_load(iter, cxlmd); iter;
1490		     iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
1491			rc = cxl_port_setup_targets(iter, cxlr, cxled);
1492			if (rc) {
1493				cxl_region_teardown_targets(cxlr);
1494				return rc;
1495			}
1496		}
1497	}
1498
1499	if (rch && vh) {
1500		dev_err(&cxlr->dev, "mismatched CXL topologies detected\n");
1501		cxl_region_teardown_targets(cxlr);
1502		return -ENXIO;
1503	}
1504
1505	return 0;
1506}
1507
1508static int cxl_region_validate_position(struct cxl_region *cxlr,
1509					struct cxl_endpoint_decoder *cxled,
1510					int pos)
1511{
1512	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1513	struct cxl_region_params *p = &cxlr->params;
1514	int i;
1515
1516	if (pos < 0 || pos >= p->interleave_ways) {
1517		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1518			p->interleave_ways);
1519		return -ENXIO;
1520	}
1521
1522	if (p->targets[pos] == cxled)
1523		return 0;
1524
1525	if (p->targets[pos]) {
1526		struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
1527		struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
1528
1529		dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
1530			pos, dev_name(&cxlmd_target->dev),
1531			dev_name(&cxled_target->cxld.dev));
1532		return -EBUSY;
1533	}
1534
1535	for (i = 0; i < p->interleave_ways; i++) {
1536		struct cxl_endpoint_decoder *cxled_target;
1537		struct cxl_memdev *cxlmd_target;
1538
1539		cxled_target = p->targets[i];
1540		if (!cxled_target)
1541			continue;
1542
1543		cxlmd_target = cxled_to_memdev(cxled_target);
1544		if (cxlmd_target == cxlmd) {
1545			dev_dbg(&cxlr->dev,
1546				"%s already specified at position %d via: %s\n",
1547				dev_name(&cxlmd->dev), pos,
1548				dev_name(&cxled_target->cxld.dev));
1549			return -EBUSY;
1550		}
1551	}
1552
1553	return 0;
1554}
1555
1556static int cxl_region_attach_position(struct cxl_region *cxlr,
1557				      struct cxl_root_decoder *cxlrd,
1558				      struct cxl_endpoint_decoder *cxled,
1559				      const struct cxl_dport *dport, int pos)
1560{
1561	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1562	struct cxl_port *iter;
1563	int rc;
1564
1565	if (cxlrd->calc_hb(cxlrd, pos) != dport) {
1566		dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
1567			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1568			dev_name(&cxlrd->cxlsd.cxld.dev));
1569		return -ENXIO;
1570	}
1571
1572	for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1573	     iter = to_cxl_port(iter->dev.parent)) {
1574		rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
1575		if (rc)
1576			goto err;
1577	}
1578
1579	return 0;
1580
1581err:
1582	for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
1583	     iter = to_cxl_port(iter->dev.parent))
1584		cxl_port_detach_region(iter, cxlr, cxled);
1585	return rc;
1586}
1587
1588static int cxl_region_attach_auto(struct cxl_region *cxlr,
1589				  struct cxl_endpoint_decoder *cxled, int pos)
1590{
1591	struct cxl_region_params *p = &cxlr->params;
1592
1593	if (cxled->state != CXL_DECODER_STATE_AUTO) {
1594		dev_err(&cxlr->dev,
1595			"%s: unable to add decoder to autodetected region\n",
1596			dev_name(&cxled->cxld.dev));
1597		return -EINVAL;
1598	}
1599
1600	if (pos >= 0) {
1601		dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
1602			dev_name(&cxled->cxld.dev), pos);
1603		return -EINVAL;
1604	}
1605
1606	if (p->nr_targets >= p->interleave_ways) {
1607		dev_err(&cxlr->dev, "%s: no more target slots available\n",
1608			dev_name(&cxled->cxld.dev));
1609		return -ENXIO;
1610	}
1611
1612	/*
1613	 * Temporarily record the endpoint decoder into the target array. Yes,
1614	 * this means that userspace can view devices in the wrong position
1615	 * before the region activates, and must be careful to understand when
1616	 * it might be racing region autodiscovery.
1617	 */
1618	pos = p->nr_targets;
1619	p->targets[pos] = cxled;
1620	cxled->pos = pos;
1621	p->nr_targets++;
1622
1623	return 0;
1624}
1625
1626static int cmp_interleave_pos(const void *a, const void *b)
1627{
1628	struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
1629	struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
1630
1631	return cxled_a->pos - cxled_b->pos;
1632}
1633
1634static struct cxl_port *next_port(struct cxl_port *port)
1635{
1636	if (!port->parent_dport)
1637		return NULL;
1638	return port->parent_dport->port;
1639}
1640
1641static int match_switch_decoder_by_range(struct device *dev, void *data)
1642{
1643	struct cxl_switch_decoder *cxlsd;
1644	struct range *r1, *r2 = data;
1645
1646	if (!is_switch_decoder(dev))
1647		return 0;
1648
1649	cxlsd = to_cxl_switch_decoder(dev);
1650	r1 = &cxlsd->cxld.hpa_range;
1651
1652	if (is_root_decoder(dev))
1653		return range_contains(r1, r2);
1654	return (r1->start == r2->start && r1->end == r2->end);
1655}
1656
1657static int find_pos_and_ways(struct cxl_port *port, struct range *range,
1658			     int *pos, int *ways)
1659{
1660	struct cxl_switch_decoder *cxlsd;
1661	struct cxl_port *parent;
1662	struct device *dev;
1663	int rc = -ENXIO;
1664
1665	parent = next_port(port);
1666	if (!parent)
1667		return rc;
1668
1669	dev = device_find_child(&parent->dev, range,
1670				match_switch_decoder_by_range);
1671	if (!dev) {
1672		dev_err(port->uport_dev,
1673			"failed to find decoder mapping %#llx-%#llx\n",
1674			range->start, range->end);
1675		return rc;
1676	}
1677	cxlsd = to_cxl_switch_decoder(dev);
1678	*ways = cxlsd->cxld.interleave_ways;
1679
1680	for (int i = 0; i < *ways; i++) {
1681		if (cxlsd->target[i] == port->parent_dport) {
1682			*pos = i;
1683			rc = 0;
1684			break;
1685		}
1686	}
1687	put_device(dev);
1688
1689	return rc;
1690}
1691
1692/**
1693 * cxl_calc_interleave_pos() - calculate an endpoint position in a region
1694 * @cxled: endpoint decoder member of given region
1695 *
1696 * The endpoint position is calculated by traversing the topology from
1697 * the endpoint to the root decoder and iteratively applying this
1698 * calculation:
1699 *
1700 *    position = position * parent_ways + parent_pos;
1701 *
1702 * ...where @position is inferred from switch and root decoder target lists.
1703 *
1704 * Return: position >= 0 on success
1705 *	   -ENXIO on failure
1706 */
1707static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled)
1708{
1709	struct cxl_port *iter, *port = cxled_to_port(cxled);
1710	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1711	struct range *range = &cxled->cxld.hpa_range;
1712	int parent_ways = 0, parent_pos = 0, pos = 0;
1713	int rc;
1714
1715	/*
1716	 * Example: the expected interleave order of the 4-way region shown
1717	 * below is: mem0, mem2, mem1, mem3
1718	 *
1719	 *		  root_port
1720	 *                 /      \
1721	 *      host_bridge_0    host_bridge_1
1722	 *        |    |           |    |
1723	 *       mem0 mem1        mem2 mem3
1724	 *
1725	 * In the example the calculator will iterate twice. The first iteration
1726	 * uses the mem position in the host-bridge and the ways of the host-
1727	 * bridge to generate the first, or local, position. The second
1728	 * iteration uses the host-bridge position in the root_port and the ways
1729	 * of the root_port to refine the position.
1730	 *
1731	 * A trace of the calculation per endpoint looks like this:
1732	 * mem0: pos = 0 * 2 + 0    mem2: pos = 0 * 2 + 0
1733	 *       pos = 0 * 2 + 0          pos = 0 * 2 + 1
1734	 *       pos: 0                   pos: 1
1735	 *
1736	 * mem1: pos = 0 * 2 + 1    mem3: pos = 0 * 2 + 1
1737	 *       pos = 1 * 2 + 0          pos = 1 * 2 + 1
1738	 *       pos: 2                   pos = 3
1739	 *
1740	 * Note that while this example is simple, the method applies to more
1741	 * complex topologies, including those with switches.
1742	 */
1743
1744	/* Iterate from endpoint to root_port refining the position */
1745	for (iter = port; iter; iter = next_port(iter)) {
1746		if (is_cxl_root(iter))
1747			break;
1748
1749		rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways);
1750		if (rc)
1751			return rc;
1752
1753		pos = pos * parent_ways + parent_pos;
1754	}
1755
1756	dev_dbg(&cxlmd->dev,
1757		"decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n",
1758		dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent),
1759		dev_name(&port->dev), range->start, range->end, pos);
1760
1761	return pos;
1762}
1763
1764static int cxl_region_sort_targets(struct cxl_region *cxlr)
1765{
1766	struct cxl_region_params *p = &cxlr->params;
1767	int i, rc = 0;
1768
1769	for (i = 0; i < p->nr_targets; i++) {
1770		struct cxl_endpoint_decoder *cxled = p->targets[i];
1771
1772		cxled->pos = cxl_calc_interleave_pos(cxled);
1773		/*
1774		 * Record that sorting failed, but still continue to calc
1775		 * cxled->pos so that follow-on code paths can reliably
1776		 * do p->targets[cxled->pos] to self-reference their entry.
1777		 */
1778		if (cxled->pos < 0)
1779			rc = -ENXIO;
1780	}
1781	/* Keep the cxlr target list in interleave position order */
1782	sort(p->targets, p->nr_targets, sizeof(p->targets[0]),
1783	     cmp_interleave_pos, NULL);
1784
1785	dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
1786	return rc;
1787}
1788
1789static int cxl_region_attach(struct cxl_region *cxlr,
1790			     struct cxl_endpoint_decoder *cxled, int pos)
1791{
1792	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1793	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1794	struct cxl_region_params *p = &cxlr->params;
1795	struct cxl_port *ep_port, *root_port;
1796	struct cxl_dport *dport;
1797	int rc = -ENXIO;
1798
1799	if (cxled->mode != cxlr->mode) {
1800		dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
1801			dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
1802		return -EINVAL;
1803	}
1804
1805	if (cxled->mode == CXL_DECODER_DEAD) {
1806		dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
1807		return -ENODEV;
1808	}
1809
1810	/* all full of members, or interleave config not established? */
1811	if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
1812		dev_dbg(&cxlr->dev, "region already active\n");
1813		return -EBUSY;
1814	} else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
1815		dev_dbg(&cxlr->dev, "interleave config missing\n");
1816		return -ENXIO;
1817	}
1818
1819	if (p->nr_targets >= p->interleave_ways) {
1820		dev_dbg(&cxlr->dev, "region already has %d endpoints\n",
1821			p->nr_targets);
1822		return -EINVAL;
1823	}
1824
1825	ep_port = cxled_to_port(cxled);
1826	root_port = cxlrd_to_port(cxlrd);
1827	dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
1828	if (!dport) {
1829		dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
1830			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1831			dev_name(cxlr->dev.parent));
1832		return -ENXIO;
1833	}
1834
1835	if (cxled->cxld.target_type != cxlr->type) {
1836		dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
1837			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1838			cxled->cxld.target_type, cxlr->type);
1839		return -ENXIO;
1840	}
1841
1842	if (!cxled->dpa_res) {
1843		dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
1844			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
1845		return -ENXIO;
1846	}
1847
1848	if (resource_size(cxled->dpa_res) * p->interleave_ways !=
1849	    resource_size(p->res)) {
1850		dev_dbg(&cxlr->dev,
1851			"%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
1852			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1853			(u64)resource_size(cxled->dpa_res), p->interleave_ways,
1854			(u64)resource_size(p->res));
1855		return -EINVAL;
1856	}
1857
1858	cxl_region_perf_data_calculate(cxlr, cxled);
1859
1860	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
1861		int i;
1862
1863		rc = cxl_region_attach_auto(cxlr, cxled, pos);
1864		if (rc)
1865			return rc;
1866
1867		/* await more targets to arrive... */
1868		if (p->nr_targets < p->interleave_ways)
1869			return 0;
1870
1871		/*
1872		 * All targets are here, which implies all PCI enumeration that
1873		 * affects this region has been completed. Walk the topology to
1874		 * sort the devices into their relative region decode position.
1875		 */
1876		rc = cxl_region_sort_targets(cxlr);
1877		if (rc)
1878			return rc;
1879
1880		for (i = 0; i < p->nr_targets; i++) {
1881			cxled = p->targets[i];
1882			ep_port = cxled_to_port(cxled);
1883			dport = cxl_find_dport_by_dev(root_port,
1884						      ep_port->host_bridge);
1885			rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
1886							dport, i);
1887			if (rc)
1888				return rc;
1889		}
1890
1891		rc = cxl_region_setup_targets(cxlr);
1892		if (rc)
1893			return rc;
1894
1895		/*
1896		 * If target setup succeeds in the autodiscovery case
1897		 * then the region is already committed.
1898		 */
1899		p->state = CXL_CONFIG_COMMIT;
1900
1901		return 0;
1902	}
1903
1904	rc = cxl_region_validate_position(cxlr, cxled, pos);
1905	if (rc)
1906		return rc;
1907
1908	rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
1909	if (rc)
1910		return rc;
1911
1912	p->targets[pos] = cxled;
1913	cxled->pos = pos;
1914	p->nr_targets++;
1915
1916	if (p->nr_targets == p->interleave_ways) {
1917		rc = cxl_region_setup_targets(cxlr);
1918		if (rc)
1919			return rc;
1920		p->state = CXL_CONFIG_ACTIVE;
1921	}
1922
1923	cxled->cxld.interleave_ways = p->interleave_ways;
1924	cxled->cxld.interleave_granularity = p->interleave_granularity;
1925	cxled->cxld.hpa_range = (struct range) {
1926		.start = p->res->start,
1927		.end = p->res->end,
1928	};
1929
1930	if (p->nr_targets != p->interleave_ways)
1931		return 0;
1932
1933	/*
1934	 * Test the auto-discovery position calculator function
1935	 * against this successfully created user-defined region.
1936	 * A fail message here means that this interleave config
1937	 * will fail when presented as CXL_REGION_F_AUTO.
1938	 */
1939	for (int i = 0; i < p->nr_targets; i++) {
1940		struct cxl_endpoint_decoder *cxled = p->targets[i];
1941		int test_pos;
1942
1943		test_pos = cxl_calc_interleave_pos(cxled);
1944		dev_dbg(&cxled->cxld.dev,
1945			"Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n",
1946			(test_pos == cxled->pos) ? "success" : "fail",
1947			test_pos, cxled->pos);
1948	}
1949
1950	return 0;
1951}
1952
1953static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
1954{
1955	struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
1956	struct cxl_region *cxlr = cxled->cxld.region;
1957	struct cxl_region_params *p;
1958	int rc = 0;
1959
1960	lockdep_assert_held_write(&cxl_region_rwsem);
1961
1962	if (!cxlr)
1963		return 0;
1964
1965	p = &cxlr->params;
1966	get_device(&cxlr->dev);
1967
1968	if (p->state > CXL_CONFIG_ACTIVE) {
1969		/*
1970		 * TODO: tear down all impacted regions if a device is
1971		 * removed out of order
1972		 */
1973		rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
1974		if (rc)
1975			goto out;
1976		p->state = CXL_CONFIG_ACTIVE;
1977	}
1978
1979	for (iter = ep_port; !is_cxl_root(iter);
1980	     iter = to_cxl_port(iter->dev.parent))
1981		cxl_port_detach_region(iter, cxlr, cxled);
1982
1983	if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
1984	    p->targets[cxled->pos] != cxled) {
1985		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1986
1987		dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
1988			      dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1989			      cxled->pos);
1990		goto out;
1991	}
1992
1993	if (p->state == CXL_CONFIG_ACTIVE) {
1994		p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
1995		cxl_region_teardown_targets(cxlr);
1996	}
1997	p->targets[cxled->pos] = NULL;
1998	p->nr_targets--;
1999	cxled->cxld.hpa_range = (struct range) {
2000		.start = 0,
2001		.end = -1,
2002	};
2003
2004	/* notify the region driver that one of its targets has departed */
2005	up_write(&cxl_region_rwsem);
2006	device_release_driver(&cxlr->dev);
2007	down_write(&cxl_region_rwsem);
2008out:
2009	put_device(&cxlr->dev);
2010	return rc;
2011}
2012
2013void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
2014{
2015	down_write(&cxl_region_rwsem);
2016	cxled->mode = CXL_DECODER_DEAD;
2017	cxl_region_detach(cxled);
2018	up_write(&cxl_region_rwsem);
2019}
2020
2021static int attach_target(struct cxl_region *cxlr,
2022			 struct cxl_endpoint_decoder *cxled, int pos,
2023			 unsigned int state)
2024{
2025	int rc = 0;
2026
2027	if (state == TASK_INTERRUPTIBLE)
2028		rc = down_write_killable(&cxl_region_rwsem);
2029	else
2030		down_write(&cxl_region_rwsem);
2031	if (rc)
2032		return rc;
2033
2034	down_read(&cxl_dpa_rwsem);
2035	rc = cxl_region_attach(cxlr, cxled, pos);
2036	up_read(&cxl_dpa_rwsem);
2037	up_write(&cxl_region_rwsem);
2038	return rc;
2039}
2040
2041static int detach_target(struct cxl_region *cxlr, int pos)
2042{
2043	struct cxl_region_params *p = &cxlr->params;
2044	int rc;
2045
2046	rc = down_write_killable(&cxl_region_rwsem);
2047	if (rc)
2048		return rc;
2049
2050	if (pos >= p->interleave_ways) {
2051		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
2052			p->interleave_ways);
2053		rc = -ENXIO;
2054		goto out;
2055	}
2056
2057	if (!p->targets[pos]) {
2058		rc = 0;
2059		goto out;
2060	}
2061
2062	rc = cxl_region_detach(p->targets[pos]);
2063out:
2064	up_write(&cxl_region_rwsem);
2065	return rc;
2066}
2067
2068static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
2069			    size_t len)
2070{
2071	int rc;
2072
2073	if (sysfs_streq(buf, "\n"))
2074		rc = detach_target(cxlr, pos);
2075	else {
2076		struct device *dev;
2077
2078		dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
2079		if (!dev)
2080			return -ENODEV;
2081
2082		if (!is_endpoint_decoder(dev)) {
2083			rc = -EINVAL;
2084			goto out;
2085		}
2086
2087		rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
2088				   TASK_INTERRUPTIBLE);
2089out:
2090		put_device(dev);
2091	}
2092
2093	if (rc < 0)
2094		return rc;
2095	return len;
2096}
2097
2098#define TARGET_ATTR_RW(n)                                              \
2099static ssize_t target##n##_show(                                       \
2100	struct device *dev, struct device_attribute *attr, char *buf)  \
2101{                                                                      \
2102	return show_targetN(to_cxl_region(dev), buf, (n));             \
2103}                                                                      \
2104static ssize_t target##n##_store(struct device *dev,                   \
2105				 struct device_attribute *attr,        \
2106				 const char *buf, size_t len)          \
2107{                                                                      \
2108	return store_targetN(to_cxl_region(dev), buf, (n), len);       \
2109}                                                                      \
2110static DEVICE_ATTR_RW(target##n)
2111
2112TARGET_ATTR_RW(0);
2113TARGET_ATTR_RW(1);
2114TARGET_ATTR_RW(2);
2115TARGET_ATTR_RW(3);
2116TARGET_ATTR_RW(4);
2117TARGET_ATTR_RW(5);
2118TARGET_ATTR_RW(6);
2119TARGET_ATTR_RW(7);
2120TARGET_ATTR_RW(8);
2121TARGET_ATTR_RW(9);
2122TARGET_ATTR_RW(10);
2123TARGET_ATTR_RW(11);
2124TARGET_ATTR_RW(12);
2125TARGET_ATTR_RW(13);
2126TARGET_ATTR_RW(14);
2127TARGET_ATTR_RW(15);
2128
2129static struct attribute *target_attrs[] = {
2130	&dev_attr_target0.attr,
2131	&dev_attr_target1.attr,
2132	&dev_attr_target2.attr,
2133	&dev_attr_target3.attr,
2134	&dev_attr_target4.attr,
2135	&dev_attr_target5.attr,
2136	&dev_attr_target6.attr,
2137	&dev_attr_target7.attr,
2138	&dev_attr_target8.attr,
2139	&dev_attr_target9.attr,
2140	&dev_attr_target10.attr,
2141	&dev_attr_target11.attr,
2142	&dev_attr_target12.attr,
2143	&dev_attr_target13.attr,
2144	&dev_attr_target14.attr,
2145	&dev_attr_target15.attr,
2146	NULL,
2147};
2148
2149static umode_t cxl_region_target_visible(struct kobject *kobj,
2150					 struct attribute *a, int n)
2151{
2152	struct device *dev = kobj_to_dev(kobj);
2153	struct cxl_region *cxlr = to_cxl_region(dev);
2154	struct cxl_region_params *p = &cxlr->params;
2155
2156	if (n < p->interleave_ways)
2157		return a->mode;
2158	return 0;
2159}
2160
2161static const struct attribute_group cxl_region_target_group = {
2162	.attrs = target_attrs,
2163	.is_visible = cxl_region_target_visible,
2164};
2165
2166static const struct attribute_group *get_cxl_region_target_group(void)
2167{
2168	return &cxl_region_target_group;
2169}
2170
2171static const struct attribute_group *region_groups[] = {
2172	&cxl_base_attribute_group,
2173	&cxl_region_group,
2174	&cxl_region_target_group,
2175	&cxl_region_access0_coordinate_group,
2176	&cxl_region_access1_coordinate_group,
2177	NULL,
2178};
2179
2180static void cxl_region_release(struct device *dev)
2181{
2182	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
2183	struct cxl_region *cxlr = to_cxl_region(dev);
2184	int id = atomic_read(&cxlrd->region_id);
2185
2186	/*
2187	 * Try to reuse the recently idled id rather than the cached
2188	 * next id to prevent the region id space from increasing
2189	 * unnecessarily.
2190	 */
2191	if (cxlr->id < id)
2192		if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
2193			memregion_free(id);
2194			goto out;
2195		}
2196
2197	memregion_free(cxlr->id);
2198out:
2199	put_device(dev->parent);
2200	kfree(cxlr);
2201}
2202
2203const struct device_type cxl_region_type = {
2204	.name = "cxl_region",
2205	.release = cxl_region_release,
2206	.groups = region_groups
2207};
2208
2209bool is_cxl_region(struct device *dev)
2210{
2211	return dev->type == &cxl_region_type;
2212}
2213EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
2214
2215static struct cxl_region *to_cxl_region(struct device *dev)
2216{
2217	if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
2218			  "not a cxl_region device\n"))
2219		return NULL;
2220
2221	return container_of(dev, struct cxl_region, dev);
2222}
2223
2224static void unregister_region(void *_cxlr)
2225{
2226	struct cxl_region *cxlr = _cxlr;
2227	struct cxl_region_params *p = &cxlr->params;
2228	int i;
2229
2230	unregister_memory_notifier(&cxlr->memory_notifier);
2231	device_del(&cxlr->dev);
2232
2233	/*
2234	 * Now that region sysfs is shutdown, the parameter block is now
2235	 * read-only, so no need to hold the region rwsem to access the
2236	 * region parameters.
2237	 */
2238	for (i = 0; i < p->interleave_ways; i++)
2239		detach_target(cxlr, i);
2240
2241	cxl_region_iomem_release(cxlr);
2242	put_device(&cxlr->dev);
2243}
2244
2245static struct lock_class_key cxl_region_key;
2246
2247static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
2248{
2249	struct cxl_region *cxlr;
2250	struct device *dev;
2251
2252	cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
2253	if (!cxlr) {
2254		memregion_free(id);
2255		return ERR_PTR(-ENOMEM);
2256	}
2257
2258	dev = &cxlr->dev;
2259	device_initialize(dev);
2260	lockdep_set_class(&dev->mutex, &cxl_region_key);
2261	dev->parent = &cxlrd->cxlsd.cxld.dev;
2262	/*
2263	 * Keep root decoder pinned through cxl_region_release to fixup
2264	 * region id allocations
2265	 */
2266	get_device(dev->parent);
2267	device_set_pm_not_required(dev);
2268	dev->bus = &cxl_bus_type;
2269	dev->type = &cxl_region_type;
2270	cxlr->id = id;
2271
2272	return cxlr;
2273}
2274
2275static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
2276{
2277	int cset = 0;
2278	int rc;
2279
2280	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
2281		if (cxlr->coord[i].read_bandwidth) {
2282			rc = 0;
2283			if (cxl_need_node_perf_attrs_update(nid))
2284				node_set_perf_attrs(nid, &cxlr->coord[i], i);
2285			else
2286				rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
2287
2288			if (rc == 0)
2289				cset++;
2290		}
2291	}
2292
2293	if (!cset)
2294		return false;
2295
2296	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group());
2297	if (rc)
2298		dev_dbg(&cxlr->dev, "Failed to update access0 group\n");
2299
2300	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group());
2301	if (rc)
2302		dev_dbg(&cxlr->dev, "Failed to update access1 group\n");
2303
2304	return true;
2305}
2306
2307static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
2308					  unsigned long action, void *arg)
2309{
2310	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
2311					       memory_notifier);
2312	struct cxl_region_params *p = &cxlr->params;
2313	struct cxl_endpoint_decoder *cxled = p->targets[0];
2314	struct cxl_decoder *cxld = &cxled->cxld;
2315	struct memory_notify *mnb = arg;
2316	int nid = mnb->status_change_nid;
2317	int region_nid;
2318
2319	if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
2320		return NOTIFY_DONE;
2321
2322	region_nid = phys_to_target_node(cxld->hpa_range.start);
2323	if (nid != region_nid)
2324		return NOTIFY_DONE;
2325
2326	if (!cxl_region_update_coordinates(cxlr, nid))
2327		return NOTIFY_DONE;
2328
2329	return NOTIFY_OK;
2330}
2331
2332/**
2333 * devm_cxl_add_region - Adds a region to a decoder
2334 * @cxlrd: root decoder
2335 * @id: memregion id to create, or memregion_free() on failure
2336 * @mode: mode for the endpoint decoders of this region
2337 * @type: select whether this is an expander or accelerator (type-2 or type-3)
2338 *
2339 * This is the second step of region initialization. Regions exist within an
2340 * address space which is mapped by a @cxlrd.
2341 *
2342 * Return: 0 if the region was added to the @cxlrd, else returns negative error
2343 * code. The region will be named "regionZ" where Z is the unique region number.
2344 */
2345static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
2346					      int id,
2347					      enum cxl_decoder_mode mode,
2348					      enum cxl_decoder_type type)
2349{
2350	struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
2351	struct cxl_region *cxlr;
2352	struct device *dev;
2353	int rc;
2354
2355	switch (mode) {
2356	case CXL_DECODER_RAM:
2357	case CXL_DECODER_PMEM:
2358		break;
2359	default:
2360		dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
2361		return ERR_PTR(-EINVAL);
2362	}
2363
2364	cxlr = cxl_region_alloc(cxlrd, id);
2365	if (IS_ERR(cxlr))
2366		return cxlr;
2367	cxlr->mode = mode;
2368	cxlr->type = type;
2369
2370	dev = &cxlr->dev;
2371	rc = dev_set_name(dev, "region%d", id);
2372	if (rc)
2373		goto err;
2374
2375	rc = device_add(dev);
2376	if (rc)
2377		goto err;
2378
2379	cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
2380	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
2381	register_memory_notifier(&cxlr->memory_notifier);
2382
2383	rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
2384	if (rc)
2385		return ERR_PTR(rc);
2386
2387	dev_dbg(port->uport_dev, "%s: created %s\n",
2388		dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
2389	return cxlr;
2390
2391err:
2392	put_device(dev);
2393	return ERR_PTR(rc);
2394}
2395
2396static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
2397{
2398	return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
2399}
2400
2401static ssize_t create_pmem_region_show(struct device *dev,
2402				       struct device_attribute *attr, char *buf)
2403{
2404	return __create_region_show(to_cxl_root_decoder(dev), buf);
2405}
2406
2407static ssize_t create_ram_region_show(struct device *dev,
2408				      struct device_attribute *attr, char *buf)
2409{
2410	return __create_region_show(to_cxl_root_decoder(dev), buf);
2411}
2412
2413static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
2414					  enum cxl_decoder_mode mode, int id)
2415{
2416	int rc;
2417
2418	rc = memregion_alloc(GFP_KERNEL);
2419	if (rc < 0)
2420		return ERR_PTR(rc);
2421
2422	if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
2423		memregion_free(rc);
2424		return ERR_PTR(-EBUSY);
2425	}
2426
2427	return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
2428}
2429
2430static ssize_t create_pmem_region_store(struct device *dev,
2431					struct device_attribute *attr,
2432					const char *buf, size_t len)
2433{
2434	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2435	struct cxl_region *cxlr;
2436	int rc, id;
2437
2438	rc = sscanf(buf, "region%d\n", &id);
2439	if (rc != 1)
2440		return -EINVAL;
2441
2442	cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id);
2443	if (IS_ERR(cxlr))
2444		return PTR_ERR(cxlr);
2445
2446	return len;
2447}
2448DEVICE_ATTR_RW(create_pmem_region);
2449
2450static ssize_t create_ram_region_store(struct device *dev,
2451				       struct device_attribute *attr,
2452				       const char *buf, size_t len)
2453{
2454	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2455	struct cxl_region *cxlr;
2456	int rc, id;
2457
2458	rc = sscanf(buf, "region%d\n", &id);
2459	if (rc != 1)
2460		return -EINVAL;
2461
2462	cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id);
2463	if (IS_ERR(cxlr))
2464		return PTR_ERR(cxlr);
2465
2466	return len;
2467}
2468DEVICE_ATTR_RW(create_ram_region);
2469
2470static ssize_t region_show(struct device *dev, struct device_attribute *attr,
2471			   char *buf)
2472{
2473	struct cxl_decoder *cxld = to_cxl_decoder(dev);
2474	ssize_t rc;
2475
2476	rc = down_read_interruptible(&cxl_region_rwsem);
2477	if (rc)
2478		return rc;
2479
2480	if (cxld->region)
2481		rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
2482	else
2483		rc = sysfs_emit(buf, "\n");
2484	up_read(&cxl_region_rwsem);
2485
2486	return rc;
2487}
2488DEVICE_ATTR_RO(region);
2489
2490static struct cxl_region *
2491cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
2492{
2493	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
2494	struct device *region_dev;
2495
2496	region_dev = device_find_child_by_name(&cxld->dev, name);
2497	if (!region_dev)
2498		return ERR_PTR(-ENODEV);
2499
2500	return to_cxl_region(region_dev);
2501}
2502
2503static ssize_t delete_region_store(struct device *dev,
2504				   struct device_attribute *attr,
2505				   const char *buf, size_t len)
2506{
2507	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
2508	struct cxl_port *port = to_cxl_port(dev->parent);
2509	struct cxl_region *cxlr;
2510
2511	cxlr = cxl_find_region_by_name(cxlrd, buf);
2512	if (IS_ERR(cxlr))
2513		return PTR_ERR(cxlr);
2514
2515	devm_release_action(port->uport_dev, unregister_region, cxlr);
2516	put_device(&cxlr->dev);
2517
2518	return len;
2519}
2520DEVICE_ATTR_WO(delete_region);
2521
2522static void cxl_pmem_region_release(struct device *dev)
2523{
2524	struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
2525	int i;
2526
2527	for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
2528		struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
2529
2530		put_device(&cxlmd->dev);
2531	}
2532
2533	kfree(cxlr_pmem);
2534}
2535
2536static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
2537	&cxl_base_attribute_group,
2538	NULL,
2539};
2540
2541const struct device_type cxl_pmem_region_type = {
2542	.name = "cxl_pmem_region",
2543	.release = cxl_pmem_region_release,
2544	.groups = cxl_pmem_region_attribute_groups,
2545};
2546
2547bool is_cxl_pmem_region(struct device *dev)
2548{
2549	return dev->type == &cxl_pmem_region_type;
2550}
2551EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL);
2552
2553struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
2554{
2555	if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
2556			  "not a cxl_pmem_region device\n"))
2557		return NULL;
2558	return container_of(dev, struct cxl_pmem_region, dev);
2559}
2560EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
2561
2562struct cxl_poison_context {
2563	struct cxl_port *port;
2564	enum cxl_decoder_mode mode;
2565	u64 offset;
2566};
2567
2568static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd,
2569				   struct cxl_poison_context *ctx)
2570{
2571	struct cxl_dev_state *cxlds = cxlmd->cxlds;
2572	u64 offset, length;
2573	int rc = 0;
2574
2575	/*
2576	 * Collect poison for the remaining unmapped resources
2577	 * after poison is collected by committed endpoints.
2578	 *
2579	 * Knowing that PMEM must always follow RAM, get poison
2580	 * for unmapped resources based on the last decoder's mode:
2581	 *	ram: scan remains of ram range, then any pmem range
2582	 *	pmem: scan remains of pmem range
2583	 */
2584
2585	if (ctx->mode == CXL_DECODER_RAM) {
2586		offset = ctx->offset;
2587		length = resource_size(&cxlds->ram_res) - offset;
2588		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2589		if (rc == -EFAULT)
2590			rc = 0;
2591		if (rc)
2592			return rc;
2593	}
2594	if (ctx->mode == CXL_DECODER_PMEM) {
2595		offset = ctx->offset;
2596		length = resource_size(&cxlds->dpa_res) - offset;
2597		if (!length)
2598			return 0;
2599	} else if (resource_size(&cxlds->pmem_res)) {
2600		offset = cxlds->pmem_res.start;
2601		length = resource_size(&cxlds->pmem_res);
2602	} else {
2603		return 0;
2604	}
2605
2606	return cxl_mem_get_poison(cxlmd, offset, length, NULL);
2607}
2608
2609static int poison_by_decoder(struct device *dev, void *arg)
2610{
2611	struct cxl_poison_context *ctx = arg;
2612	struct cxl_endpoint_decoder *cxled;
2613	struct cxl_memdev *cxlmd;
2614	u64 offset, length;
2615	int rc = 0;
2616
2617	if (!is_endpoint_decoder(dev))
2618		return rc;
2619
2620	cxled = to_cxl_endpoint_decoder(dev);
2621	if (!cxled->dpa_res || !resource_size(cxled->dpa_res))
2622		return rc;
2623
2624	/*
2625	 * Regions are only created with single mode decoders: pmem or ram.
2626	 * Linux does not support mixed mode decoders. This means that
2627	 * reading poison per endpoint decoder adheres to the requirement
2628	 * that poison reads of pmem and ram must be separated.
2629	 * CXL 3.0 Spec 8.2.9.8.4.1
2630	 */
2631	if (cxled->mode == CXL_DECODER_MIXED) {
2632		dev_dbg(dev, "poison list read unsupported in mixed mode\n");
2633		return rc;
2634	}
2635
2636	cxlmd = cxled_to_memdev(cxled);
2637	if (cxled->skip) {
2638		offset = cxled->dpa_res->start - cxled->skip;
2639		length = cxled->skip;
2640		rc = cxl_mem_get_poison(cxlmd, offset, length, NULL);
2641		if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
2642			rc = 0;
2643		if (rc)
2644			return rc;
2645	}
2646
2647	offset = cxled->dpa_res->start;
2648	length = cxled->dpa_res->end - offset + 1;
2649	rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region);
2650	if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM)
2651		rc = 0;
2652	if (rc)
2653		return rc;
2654
2655	/* Iterate until commit_end is reached */
2656	if (cxled->cxld.id == ctx->port->commit_end) {
2657		ctx->offset = cxled->dpa_res->end + 1;
2658		ctx->mode = cxled->mode;
2659		return 1;
2660	}
2661
2662	return 0;
2663}
2664
2665int cxl_get_poison_by_endpoint(struct cxl_port *port)
2666{
2667	struct cxl_poison_context ctx;
2668	int rc = 0;
2669
2670	ctx = (struct cxl_poison_context) {
2671		.port = port
2672	};
2673
2674	rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder);
2675	if (rc == 1)
2676		rc = cxl_get_poison_unmapped(to_cxl_memdev(port->uport_dev),
2677					     &ctx);
2678
2679	return rc;
2680}
2681
2682static struct lock_class_key cxl_pmem_region_key;
2683
2684static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
2685{
2686	struct cxl_region_params *p = &cxlr->params;
2687	struct cxl_nvdimm_bridge *cxl_nvb;
2688	struct cxl_pmem_region *cxlr_pmem;
2689	struct device *dev;
2690	int i;
2691
2692	down_read(&cxl_region_rwsem);
2693	if (p->state != CXL_CONFIG_COMMIT) {
2694		cxlr_pmem = ERR_PTR(-ENXIO);
2695		goto out;
2696	}
2697
2698	cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets),
2699			    GFP_KERNEL);
2700	if (!cxlr_pmem) {
2701		cxlr_pmem = ERR_PTR(-ENOMEM);
2702		goto out;
2703	}
2704
2705	cxlr_pmem->hpa_range.start = p->res->start;
2706	cxlr_pmem->hpa_range.end = p->res->end;
2707
2708	/* Snapshot the region configuration underneath the cxl_region_rwsem */
2709	cxlr_pmem->nr_mappings = p->nr_targets;
2710	for (i = 0; i < p->nr_targets; i++) {
2711		struct cxl_endpoint_decoder *cxled = p->targets[i];
2712		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2713		struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
2714
2715		/*
2716		 * Regions never span CXL root devices, so by definition the
2717		 * bridge for one device is the same for all.
2718		 */
2719		if (i == 0) {
2720			cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
2721			if (!cxl_nvb) {
2722				cxlr_pmem = ERR_PTR(-ENODEV);
2723				goto out;
2724			}
2725			cxlr->cxl_nvb = cxl_nvb;
2726		}
2727		m->cxlmd = cxlmd;
2728		get_device(&cxlmd->dev);
2729		m->start = cxled->dpa_res->start;
2730		m->size = resource_size(cxled->dpa_res);
2731		m->position = i;
2732	}
2733
2734	dev = &cxlr_pmem->dev;
2735	cxlr_pmem->cxlr = cxlr;
2736	cxlr->cxlr_pmem = cxlr_pmem;
2737	device_initialize(dev);
2738	lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
2739	device_set_pm_not_required(dev);
2740	dev->parent = &cxlr->dev;
2741	dev->bus = &cxl_bus_type;
2742	dev->type = &cxl_pmem_region_type;
2743out:
2744	up_read(&cxl_region_rwsem);
2745
2746	return cxlr_pmem;
2747}
2748
2749static void cxl_dax_region_release(struct device *dev)
2750{
2751	struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
2752
2753	kfree(cxlr_dax);
2754}
2755
2756static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
2757	&cxl_base_attribute_group,
2758	NULL,
2759};
2760
2761const struct device_type cxl_dax_region_type = {
2762	.name = "cxl_dax_region",
2763	.release = cxl_dax_region_release,
2764	.groups = cxl_dax_region_attribute_groups,
2765};
2766
2767static bool is_cxl_dax_region(struct device *dev)
2768{
2769	return dev->type == &cxl_dax_region_type;
2770}
2771
2772struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
2773{
2774	if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
2775			  "not a cxl_dax_region device\n"))
2776		return NULL;
2777	return container_of(dev, struct cxl_dax_region, dev);
2778}
2779EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL);
2780
2781static struct lock_class_key cxl_dax_region_key;
2782
2783static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
2784{
2785	struct cxl_region_params *p = &cxlr->params;
2786	struct cxl_dax_region *cxlr_dax;
2787	struct device *dev;
2788
2789	down_read(&cxl_region_rwsem);
2790	if (p->state != CXL_CONFIG_COMMIT) {
2791		cxlr_dax = ERR_PTR(-ENXIO);
2792		goto out;
2793	}
2794
2795	cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
2796	if (!cxlr_dax) {
2797		cxlr_dax = ERR_PTR(-ENOMEM);
2798		goto out;
2799	}
2800
2801	cxlr_dax->hpa_range.start = p->res->start;
2802	cxlr_dax->hpa_range.end = p->res->end;
2803
2804	dev = &cxlr_dax->dev;
2805	cxlr_dax->cxlr = cxlr;
2806	device_initialize(dev);
2807	lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
2808	device_set_pm_not_required(dev);
2809	dev->parent = &cxlr->dev;
2810	dev->bus = &cxl_bus_type;
2811	dev->type = &cxl_dax_region_type;
2812out:
2813	up_read(&cxl_region_rwsem);
2814
2815	return cxlr_dax;
2816}
2817
2818static void cxlr_pmem_unregister(void *_cxlr_pmem)
2819{
2820	struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
2821	struct cxl_region *cxlr = cxlr_pmem->cxlr;
2822	struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
2823
2824	/*
2825	 * Either the bridge is in ->remove() context under the device_lock(),
2826	 * or cxlr_release_nvdimm() is cancelling the bridge's release action
2827	 * for @cxlr_pmem and doing it itself (while manually holding the bridge
2828	 * lock).
2829	 */
2830	device_lock_assert(&cxl_nvb->dev);
2831	cxlr->cxlr_pmem = NULL;
2832	cxlr_pmem->cxlr = NULL;
2833	device_unregister(&cxlr_pmem->dev);
2834}
2835
2836static void cxlr_release_nvdimm(void *_cxlr)
2837{
2838	struct cxl_region *cxlr = _cxlr;
2839	struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb;
2840
2841	device_lock(&cxl_nvb->dev);
2842	if (cxlr->cxlr_pmem)
2843		devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister,
2844				    cxlr->cxlr_pmem);
2845	device_unlock(&cxl_nvb->dev);
2846	cxlr->cxl_nvb = NULL;
2847	put_device(&cxl_nvb->dev);
2848}
2849
2850/**
2851 * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
2852 * @cxlr: parent CXL region for this pmem region bridge device
2853 *
2854 * Return: 0 on success negative error code on failure.
2855 */
2856static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
2857{
2858	struct cxl_pmem_region *cxlr_pmem;
2859	struct cxl_nvdimm_bridge *cxl_nvb;
2860	struct device *dev;
2861	int rc;
2862
2863	cxlr_pmem = cxl_pmem_region_alloc(cxlr);
2864	if (IS_ERR(cxlr_pmem))
2865		return PTR_ERR(cxlr_pmem);
2866	cxl_nvb = cxlr->cxl_nvb;
2867
2868	dev = &cxlr_pmem->dev;
2869	rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
2870	if (rc)
2871		goto err;
2872
2873	rc = device_add(dev);
2874	if (rc)
2875		goto err;
2876
2877	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
2878		dev_name(dev));
2879
2880	device_lock(&cxl_nvb->dev);
2881	if (cxl_nvb->dev.driver)
2882		rc = devm_add_action_or_reset(&cxl_nvb->dev,
2883					      cxlr_pmem_unregister, cxlr_pmem);
2884	else
2885		rc = -ENXIO;
2886	device_unlock(&cxl_nvb->dev);
2887
2888	if (rc)
2889		goto err_bridge;
2890
2891	/* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */
2892	return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr);
2893
2894err:
2895	put_device(dev);
2896err_bridge:
2897	put_device(&cxl_nvb->dev);
2898	cxlr->cxl_nvb = NULL;
2899	return rc;
2900}
2901
2902static void cxlr_dax_unregister(void *_cxlr_dax)
2903{
2904	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
2905
2906	device_unregister(&cxlr_dax->dev);
2907}
2908
2909static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
2910{
2911	struct cxl_dax_region *cxlr_dax;
2912	struct device *dev;
2913	int rc;
2914
2915	cxlr_dax = cxl_dax_region_alloc(cxlr);
2916	if (IS_ERR(cxlr_dax))
2917		return PTR_ERR(cxlr_dax);
2918
2919	dev = &cxlr_dax->dev;
2920	rc = dev_set_name(dev, "dax_region%d", cxlr->id);
2921	if (rc)
2922		goto err;
2923
2924	rc = device_add(dev);
2925	if (rc)
2926		goto err;
2927
2928	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
2929		dev_name(dev));
2930
2931	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
2932					cxlr_dax);
2933err:
2934	put_device(dev);
2935	return rc;
2936}
2937
2938static int match_root_decoder_by_range(struct device *dev, void *data)
2939{
2940	struct range *r1, *r2 = data;
2941	struct cxl_root_decoder *cxlrd;
2942
2943	if (!is_root_decoder(dev))
2944		return 0;
2945
2946	cxlrd = to_cxl_root_decoder(dev);
2947	r1 = &cxlrd->cxlsd.cxld.hpa_range;
2948	return range_contains(r1, r2);
2949}
2950
2951static int match_region_by_range(struct device *dev, void *data)
2952{
2953	struct cxl_region_params *p;
2954	struct cxl_region *cxlr;
2955	struct range *r = data;
2956	int rc = 0;
2957
2958	if (!is_cxl_region(dev))
2959		return 0;
2960
2961	cxlr = to_cxl_region(dev);
2962	p = &cxlr->params;
2963
2964	down_read(&cxl_region_rwsem);
2965	if (p->res && p->res->start == r->start && p->res->end == r->end)
2966		rc = 1;
2967	up_read(&cxl_region_rwsem);
2968
2969	return rc;
2970}
2971
2972/* Establish an empty region covering the given HPA range */
2973static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
2974					   struct cxl_endpoint_decoder *cxled)
2975{
2976	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
2977	struct cxl_port *port = cxlrd_to_port(cxlrd);
2978	struct range *hpa = &cxled->cxld.hpa_range;
2979	struct cxl_region_params *p;
2980	struct cxl_region *cxlr;
2981	struct resource *res;
2982	int rc;
2983
2984	do {
2985		cxlr = __create_region(cxlrd, cxled->mode,
2986				       atomic_read(&cxlrd->region_id));
2987	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
2988
2989	if (IS_ERR(cxlr)) {
2990		dev_err(cxlmd->dev.parent,
2991			"%s:%s: %s failed assign region: %ld\n",
2992			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
2993			__func__, PTR_ERR(cxlr));
2994		return cxlr;
2995	}
2996
2997	down_write(&cxl_region_rwsem);
2998	p = &cxlr->params;
2999	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
3000		dev_err(cxlmd->dev.parent,
3001			"%s:%s: %s autodiscovery interrupted\n",
3002			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3003			__func__);
3004		rc = -EBUSY;
3005		goto err;
3006	}
3007
3008	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
3009
3010	res = kmalloc(sizeof(*res), GFP_KERNEL);
3011	if (!res) {
3012		rc = -ENOMEM;
3013		goto err;
3014	}
3015
3016	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
3017				    dev_name(&cxlr->dev));
3018	rc = insert_resource(cxlrd->res, res);
3019	if (rc) {
3020		/*
3021		 * Platform-firmware may not have split resources like "System
3022		 * RAM" on CXL window boundaries see cxl_region_iomem_release()
3023		 */
3024		dev_warn(cxlmd->dev.parent,
3025			 "%s:%s: %s %s cannot insert resource\n",
3026			 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
3027			 __func__, dev_name(&cxlr->dev));
3028	}
3029
3030	p->res = res;
3031	p->interleave_ways = cxled->cxld.interleave_ways;
3032	p->interleave_granularity = cxled->cxld.interleave_granularity;
3033	p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
3034
3035	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
3036	if (rc)
3037		goto err;
3038
3039	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
3040		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
3041		dev_name(&cxlr->dev), p->res, p->interleave_ways,
3042		p->interleave_granularity);
3043
3044	/* ...to match put_device() in cxl_add_to_region() */
3045	get_device(&cxlr->dev);
3046	up_write(&cxl_region_rwsem);
3047
3048	return cxlr;
3049
3050err:
3051	up_write(&cxl_region_rwsem);
3052	devm_release_action(port->uport_dev, unregister_region, cxlr);
3053	return ERR_PTR(rc);
3054}
3055
3056int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
3057{
3058	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
3059	struct range *hpa = &cxled->cxld.hpa_range;
3060	struct cxl_decoder *cxld = &cxled->cxld;
3061	struct device *cxlrd_dev, *region_dev;
3062	struct cxl_root_decoder *cxlrd;
3063	struct cxl_region_params *p;
3064	struct cxl_region *cxlr;
3065	bool attach = false;
3066	int rc;
3067
3068	cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
3069				      match_root_decoder_by_range);
3070	if (!cxlrd_dev) {
3071		dev_err(cxlmd->dev.parent,
3072			"%s:%s no CXL window for range %#llx:%#llx\n",
3073			dev_name(&cxlmd->dev), dev_name(&cxld->dev),
3074			cxld->hpa_range.start, cxld->hpa_range.end);
3075		return -ENXIO;
3076	}
3077
3078	cxlrd = to_cxl_root_decoder(cxlrd_dev);
3079
3080	/*
3081	 * Ensure that if multiple threads race to construct_region() for @hpa
3082	 * one does the construction and the others add to that.
3083	 */
3084	mutex_lock(&cxlrd->range_lock);
3085	region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
3086				       match_region_by_range);
3087	if (!region_dev) {
3088		cxlr = construct_region(cxlrd, cxled);
3089		region_dev = &cxlr->dev;
3090	} else
3091		cxlr = to_cxl_region(region_dev);
3092	mutex_unlock(&cxlrd->range_lock);
3093
3094	rc = PTR_ERR_OR_ZERO(cxlr);
3095	if (rc)
3096		goto out;
3097
3098	attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
3099
3100	down_read(&cxl_region_rwsem);
3101	p = &cxlr->params;
3102	attach = p->state == CXL_CONFIG_COMMIT;
3103	up_read(&cxl_region_rwsem);
3104
3105	if (attach) {
3106		/*
3107		 * If device_attach() fails the range may still be active via
3108		 * the platform-firmware memory map, otherwise the driver for
3109		 * regions is local to this file, so driver matching can't fail.
3110		 */
3111		if (device_attach(&cxlr->dev) < 0)
3112			dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
3113				p->res);
3114	}
3115
3116	put_device(region_dev);
3117out:
3118	put_device(cxlrd_dev);
3119	return rc;
3120}
3121EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
3122
3123static int is_system_ram(struct resource *res, void *arg)
3124{
3125	struct cxl_region *cxlr = arg;
3126	struct cxl_region_params *p = &cxlr->params;
3127
3128	dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
3129	return 1;
3130}
3131
3132static int cxl_region_probe(struct device *dev)
3133{
3134	struct cxl_region *cxlr = to_cxl_region(dev);
3135	struct cxl_region_params *p = &cxlr->params;
3136	int rc;
3137
3138	rc = down_read_interruptible(&cxl_region_rwsem);
3139	if (rc) {
3140		dev_dbg(&cxlr->dev, "probe interrupted\n");
3141		return rc;
3142	}
3143
3144	if (p->state < CXL_CONFIG_COMMIT) {
3145		dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
3146		rc = -ENXIO;
3147		goto out;
3148	}
3149
3150	if (test_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags)) {
3151		dev_err(&cxlr->dev,
3152			"failed to activate, re-commit region and retry\n");
3153		rc = -ENXIO;
3154		goto out;
3155	}
3156
3157	/*
3158	 * From this point on any path that changes the region's state away from
3159	 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
3160	 */
3161out:
3162	up_read(&cxl_region_rwsem);
3163
3164	if (rc)
3165		return rc;
3166
3167	switch (cxlr->mode) {
3168	case CXL_DECODER_PMEM:
3169		return devm_cxl_add_pmem_region(cxlr);
3170	case CXL_DECODER_RAM:
3171		/*
3172		 * The region can not be manged by CXL if any portion of
3173		 * it is already online as 'System RAM'
3174		 */
3175		if (walk_iomem_res_desc(IORES_DESC_NONE,
3176					IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
3177					p->res->start, p->res->end, cxlr,
3178					is_system_ram) > 0)
3179			return 0;
3180		return devm_cxl_add_dax_region(cxlr);
3181	default:
3182		dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
3183			cxlr->mode);
3184		return -ENXIO;
3185	}
3186}
3187
3188static struct cxl_driver cxl_region_driver = {
3189	.name = "cxl_region",
3190	.probe = cxl_region_probe,
3191	.id = CXL_DEVICE_REGION,
3192};
3193
3194int cxl_region_init(void)
3195{
3196	return cxl_driver_register(&cxl_region_driver);
3197}
3198
3199void cxl_region_exit(void)
3200{
3201	cxl_driver_unregister(&cxl_region_driver);
3202}
3203
3204MODULE_IMPORT_NS(CXL);
3205MODULE_IMPORT_NS(DEVMEM);
3206MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
3207