11539Srgrimes// SPDX-License-Identifier: GPL-2.0-only
21539Srgrimes/*
31539Srgrimes * Coherency fabric (Aurora) support for Armada 370, 375, 38x and XP
41539Srgrimes * platforms.
51539Srgrimes *
61539Srgrimes * Copyright (C) 2012 Marvell
71539Srgrimes *
81539Srgrimes * Yehuda Yitschak <yehuday@marvell.com>
91539Srgrimes * Gregory Clement <gregory.clement@free-electrons.com>
101539Srgrimes * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
111539Srgrimes *
121539Srgrimes * The Armada 370, 375, 38x and XP SOCs have a coherency fabric which is
131539Srgrimes * responsible for ensuring hardware coherency between all CPUs and between
141539Srgrimes * CPUs and I/O masters. This file initializes the coherency fabric and
151539Srgrimes * supplies basic routines for configuring and controlling hardware coherency
161539Srgrimes */
171539Srgrimes
181539Srgrimes#define pr_fmt(fmt) "mvebu-coherency: " fmt
191539Srgrimes
201539Srgrimes#include <linux/kernel.h>
211539Srgrimes#include <linux/init.h>
221539Srgrimes#include <linux/of_address.h>
231539Srgrimes#include <linux/io.h>
241539Srgrimes#include <linux/smp.h>
251539Srgrimes#include <linux/dma-map-ops.h>
261539Srgrimes#include <linux/platform_device.h>
271539Srgrimes#include <linux/slab.h>
281539Srgrimes#include <linux/mbus.h>
291539Srgrimes#include <linux/pci.h>
301539Srgrimes#include <asm/smp_plat.h>
311539Srgrimes#include <asm/cacheflush.h>
321539Srgrimes#include <asm/mach/map.h>
331539Srgrimes#include <asm/dma-mapping.h>
341539Srgrimes#include "coherency.h"
351539Srgrimes#include "mvebu-soc-id.h"
361539Srgrimes
371539Srgrimesunsigned long coherency_phys_base;
381539Srgrimesvoid __iomem *coherency_base;
393041Swollmanstatic void __iomem *coherency_cpu_base;
403041Swollmanstatic void __iomem *cpu_config_base;
411539Srgrimes
421539Srgrimes/* Coherency fabric registers */
431539Srgrimes#define IO_SYNC_BARRIER_CTL_OFFSET		   0x0
441539Srgrimes
451570Srgrimesenum {
461539Srgrimes	COHERENCY_FABRIC_TYPE_NONE,
471539Srgrimes	COHERENCY_FABRIC_TYPE_ARMADA_370_XP,
481539Srgrimes	COHERENCY_FABRIC_TYPE_ARMADA_375,
491539Srgrimes	COHERENCY_FABRIC_TYPE_ARMADA_380,
501539Srgrimes};
511539Srgrimes
521539Srgrimesstatic const struct of_device_id of_coherency_table[] = {
531539Srgrimes	{.compatible = "marvell,coherency-fabric",
541539Srgrimes	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_370_XP },
551539Srgrimes	{.compatible = "marvell,armada-375-coherency-fabric",
561539Srgrimes	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_375 },
571539Srgrimes	{.compatible = "marvell,armada-380-coherency-fabric",
581539Srgrimes	 .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_380 },
591539Srgrimes	{ /* end of list */ },
601539Srgrimes};
613041Swollman
621539Srgrimes/* Functions defined in coherency_ll.S */
631539Srgrimesint ll_enable_coherency(void);
641539Srgrimesvoid ll_add_cpu_to_smp_group(void);
651539Srgrimes
661539Srgrimes#define CPU_CONFIG_SHARED_L2 BIT(16)
671539Srgrimes
681539Srgrimes/*
691539Srgrimes * Disable the "Shared L2 Present" bit in CPU Configuration register
701539Srgrimes * on Armada XP.
713041Swollman *
723041Swollman * The "Shared L2 Present" bit affects the "level of coherence" value
733041Swollman * in the clidr CP15 register.  Cache operation functions such as
743041Swollman * "flush all" and "invalidate all" operate on all the cache levels
753041Swollman * that included in the defined level of coherence. When HW I/O
761539Srgrimes * coherency is used, this bit causes unnecessary flushes of the L2
77 * cache.
78 */
79static void armada_xp_clear_shared_l2(void)
80{
81	u32 reg;
82
83	if (!cpu_config_base)
84		return;
85
86	reg = readl(cpu_config_base);
87	reg &= ~CPU_CONFIG_SHARED_L2;
88	writel(reg, cpu_config_base);
89}
90
91static int mvebu_hwcc_notifier(struct notifier_block *nb,
92			       unsigned long event, void *__dev)
93{
94	struct device *dev = __dev;
95
96	if (event != BUS_NOTIFY_ADD_DEVICE)
97		return NOTIFY_DONE;
98	dev->dma_coherent = true;
99
100	return NOTIFY_OK;
101}
102
103static struct notifier_block mvebu_hwcc_nb = {
104	.notifier_call = mvebu_hwcc_notifier,
105};
106
107static struct notifier_block mvebu_hwcc_pci_nb __maybe_unused = {
108	.notifier_call = mvebu_hwcc_notifier,
109};
110
111static int armada_xp_clear_l2_starting(unsigned int cpu)
112{
113	armada_xp_clear_shared_l2();
114	return 0;
115}
116
117static void __init armada_370_coherency_init(struct device_node *np)
118{
119	struct resource res;
120	struct device_node *cpu_config_np;
121
122	of_address_to_resource(np, 0, &res);
123	coherency_phys_base = res.start;
124	/*
125	 * Ensure secondary CPUs will see the updated value,
126	 * which they read before they join the coherency
127	 * fabric, and therefore before they are coherent with
128	 * the boot CPU cache.
129	 */
130	sync_cache_w(&coherency_phys_base);
131	coherency_base = of_iomap(np, 0);
132	coherency_cpu_base = of_iomap(np, 1);
133
134	cpu_config_np = of_find_compatible_node(NULL, NULL,
135						"marvell,armada-xp-cpu-config");
136	if (!cpu_config_np)
137		goto exit;
138
139	cpu_config_base = of_iomap(cpu_config_np, 0);
140	if (!cpu_config_base) {
141		of_node_put(cpu_config_np);
142		goto exit;
143	}
144
145	of_node_put(cpu_config_np);
146
147	cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_COHERENCY,
148				  "arm/mvebu/coherency:starting",
149				  armada_xp_clear_l2_starting, NULL);
150exit:
151	set_cpu_coherent();
152}
153
154/*
155 * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
156 * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
157 * needed for the HW I/O coherency mechanism to work properly without
158 * deadlock.
159 */
160static void __iomem *
161armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
162			 unsigned int mtype, void *caller)
163{
164	mtype = MT_UNCACHED;
165	return __arm_ioremap_caller(phys_addr, size, mtype, caller);
166}
167
168static void __init armada_375_380_coherency_init(struct device_node *np)
169{
170	struct device_node *cache_dn;
171
172	coherency_cpu_base = of_iomap(np, 0);
173	arch_ioremap_caller = armada_wa_ioremap_caller;
174	pci_ioremap_set_mem_type(MT_UNCACHED);
175
176	/*
177	 * We should switch the PL310 to I/O coherency mode only if
178	 * I/O coherency is actually enabled.
179	 */
180	if (!coherency_available())
181		return;
182
183	/*
184	 * Add the PL310 property "arm,io-coherent". This makes sure the
185	 * outer sync operation is not used, which allows to
186	 * workaround the system erratum that causes deadlocks when
187	 * doing PCIe in an SMP situation on Armada 375 and Armada
188	 * 38x.
189	 */
190	for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") {
191		struct property *p;
192
193		p = kzalloc(sizeof(*p), GFP_KERNEL);
194		p->name = kstrdup("arm,io-coherent", GFP_KERNEL);
195		of_add_property(cache_dn, p);
196	}
197}
198
199static int coherency_type(void)
200{
201	struct device_node *np;
202	const struct of_device_id *match;
203	int type;
204
205	/*
206	 * The coherency fabric is needed:
207	 * - For coherency between processors on Armada XP, so only
208	 *   when SMP is enabled.
209	 * - For coherency between the processor and I/O devices, but
210	 *   this coherency requires many pre-requisites (write
211	 *   allocate cache policy, shareable pages, SMP bit set) that
212	 *   are only meant in SMP situations.
213	 *
214	 * Note that this means that on Armada 370, there is currently
215	 * no way to use hardware I/O coherency, because even when
216	 * CONFIG_SMP is enabled, is_smp() returns false due to the
217	 * Armada 370 being a single-core processor. To lift this
218	 * limitation, we would have to find a way to make the cache
219	 * policy set to write-allocate (on all Armada SoCs), and to
220	 * set the shareable attribute in page tables (on all Armada
221	 * SoCs except the Armada 370). Unfortunately, such decisions
222	 * are taken very early in the kernel boot process, at a point
223	 * where we don't know yet on which SoC we are running.
224
225	 */
226	if (!is_smp())
227		return COHERENCY_FABRIC_TYPE_NONE;
228
229	np = of_find_matching_node_and_match(NULL, of_coherency_table, &match);
230	if (!np)
231		return COHERENCY_FABRIC_TYPE_NONE;
232
233	type = (int) match->data;
234
235	of_node_put(np);
236
237	return type;
238}
239
240int set_cpu_coherent(void)
241{
242	int type = coherency_type();
243
244	if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP) {
245		if (!coherency_base) {
246			pr_warn("Can't make current CPU cache coherent.\n");
247			pr_warn("Coherency fabric is not initialized\n");
248			return 1;
249		}
250
251		armada_xp_clear_shared_l2();
252		ll_add_cpu_to_smp_group();
253		return ll_enable_coherency();
254	}
255
256	return 0;
257}
258
259int coherency_available(void)
260{
261	return coherency_type() != COHERENCY_FABRIC_TYPE_NONE;
262}
263
264int __init coherency_init(void)
265{
266	int type = coherency_type();
267	struct device_node *np;
268
269	np = of_find_matching_node(NULL, of_coherency_table);
270
271	if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP)
272		armada_370_coherency_init(np);
273	else if (type == COHERENCY_FABRIC_TYPE_ARMADA_375 ||
274		 type == COHERENCY_FABRIC_TYPE_ARMADA_380)
275		armada_375_380_coherency_init(np);
276
277	of_node_put(np);
278
279	return 0;
280}
281
282static int __init coherency_late_init(void)
283{
284	if (coherency_available())
285		bus_register_notifier(&platform_bus_type,
286				      &mvebu_hwcc_nb);
287	return 0;
288}
289
290postcore_initcall(coherency_late_init);
291
292#if IS_ENABLED(CONFIG_PCI)
293static int __init coherency_pci_init(void)
294{
295	if (coherency_available())
296		bus_register_notifier(&pci_bus_type,
297				       &mvebu_hwcc_pci_nb);
298	return 0;
299}
300
301arch_initcall(coherency_pci_init);
302#endif
303