1// SPDX-License-Identifier: GPL-2.0
2/*
3 * IOMMU API for ARM architected SMMUv3 implementations.
4 *
5 * Copyright (C) 2015 ARM Limited
6 *
7 * Author: Will Deacon <will.deacon@arm.com>
8 *
9 * This driver is powered by bad coffee and bombay mix.
10 */
11
12#include <linux/acpi.h>
13#include <linux/acpi_iort.h>
14#include <linux/bitops.h>
15#include <linux/crash_dump.h>
16#include <linux/delay.h>
17#include <linux/err.h>
18#include <linux/interrupt.h>
19#include <linux/io-pgtable.h>
20#include <linux/iopoll.h>
21#include <linux/module.h>
22#include <linux/msi.h>
23#include <linux/of.h>
24#include <linux/of_address.h>
25#include <linux/of_platform.h>
26#include <linux/pci.h>
27#include <linux/pci-ats.h>
28#include <linux/platform_device.h>
29
30#include "arm-smmu-v3.h"
31#include "../../dma-iommu.h"
32
33static bool disable_bypass = true;
34module_param(disable_bypass, bool, 0444);
35MODULE_PARM_DESC(disable_bypass,
36	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
37
38static bool disable_msipolling;
39module_param(disable_msipolling, bool, 0444);
40MODULE_PARM_DESC(disable_msipolling,
41	"Disable MSI-based polling for CMD_SYNC completion.");
42
43enum arm_smmu_msi_index {
44	EVTQ_MSI_INDEX,
45	GERROR_MSI_INDEX,
46	PRIQ_MSI_INDEX,
47	ARM_SMMU_MAX_MSIS,
48};
49
50static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu,
51				      ioasid_t sid);
52
53static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
54	[EVTQ_MSI_INDEX] = {
55		ARM_SMMU_EVTQ_IRQ_CFG0,
56		ARM_SMMU_EVTQ_IRQ_CFG1,
57		ARM_SMMU_EVTQ_IRQ_CFG2,
58	},
59	[GERROR_MSI_INDEX] = {
60		ARM_SMMU_GERROR_IRQ_CFG0,
61		ARM_SMMU_GERROR_IRQ_CFG1,
62		ARM_SMMU_GERROR_IRQ_CFG2,
63	},
64	[PRIQ_MSI_INDEX] = {
65		ARM_SMMU_PRIQ_IRQ_CFG0,
66		ARM_SMMU_PRIQ_IRQ_CFG1,
67		ARM_SMMU_PRIQ_IRQ_CFG2,
68	},
69};
70
71struct arm_smmu_option_prop {
72	u32 opt;
73	const char *prop;
74};
75
76DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
77DEFINE_MUTEX(arm_smmu_asid_lock);
78
79/*
80 * Special value used by SVA when a process dies, to quiesce a CD without
81 * disabling it.
82 */
83struct arm_smmu_ctx_desc quiet_cd = { 0 };
84
85static struct arm_smmu_option_prop arm_smmu_options[] = {
86	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
87	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
88	{ 0, NULL},
89};
90
91static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
92				    struct arm_smmu_device *smmu);
93
94static void parse_driver_options(struct arm_smmu_device *smmu)
95{
96	int i = 0;
97
98	do {
99		if (of_property_read_bool(smmu->dev->of_node,
100						arm_smmu_options[i].prop)) {
101			smmu->options |= arm_smmu_options[i].opt;
102			dev_notice(smmu->dev, "option %s\n",
103				arm_smmu_options[i].prop);
104		}
105	} while (arm_smmu_options[++i].opt);
106}
107
108/* Low-level queue manipulation functions */
109static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
110{
111	u32 space, prod, cons;
112
113	prod = Q_IDX(q, q->prod);
114	cons = Q_IDX(q, q->cons);
115
116	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
117		space = (1 << q->max_n_shift) - (prod - cons);
118	else
119		space = cons - prod;
120
121	return space >= n;
122}
123
124static bool queue_full(struct arm_smmu_ll_queue *q)
125{
126	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
127	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
128}
129
130static bool queue_empty(struct arm_smmu_ll_queue *q)
131{
132	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
133	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
134}
135
136static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
137{
138	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
139		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
140	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
141		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
142}
143
144static void queue_sync_cons_out(struct arm_smmu_queue *q)
145{
146	/*
147	 * Ensure that all CPU accesses (reads and writes) to the queue
148	 * are complete before we update the cons pointer.
149	 */
150	__iomb();
151	writel_relaxed(q->llq.cons, q->cons_reg);
152}
153
154static void queue_inc_cons(struct arm_smmu_ll_queue *q)
155{
156	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
157	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
158}
159
160static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
161{
162	struct arm_smmu_ll_queue *llq = &q->llq;
163
164	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
165		return;
166
167	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
168		      Q_IDX(llq, llq->cons);
169	queue_sync_cons_out(q);
170}
171
172static int queue_sync_prod_in(struct arm_smmu_queue *q)
173{
174	u32 prod;
175	int ret = 0;
176
177	/*
178	 * We can't use the _relaxed() variant here, as we must prevent
179	 * speculative reads of the queue before we have determined that
180	 * prod has indeed moved.
181	 */
182	prod = readl(q->prod_reg);
183
184	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
185		ret = -EOVERFLOW;
186
187	q->llq.prod = prod;
188	return ret;
189}
190
191static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
192{
193	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
194	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
195}
196
197static void queue_poll_init(struct arm_smmu_device *smmu,
198			    struct arm_smmu_queue_poll *qp)
199{
200	qp->delay = 1;
201	qp->spin_cnt = 0;
202	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
203	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
204}
205
206static int queue_poll(struct arm_smmu_queue_poll *qp)
207{
208	if (ktime_compare(ktime_get(), qp->timeout) > 0)
209		return -ETIMEDOUT;
210
211	if (qp->wfe) {
212		wfe();
213	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
214		cpu_relax();
215	} else {
216		udelay(qp->delay);
217		qp->delay *= 2;
218		qp->spin_cnt = 0;
219	}
220
221	return 0;
222}
223
224static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
225{
226	int i;
227
228	for (i = 0; i < n_dwords; ++i)
229		*dst++ = cpu_to_le64(*src++);
230}
231
232static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
233{
234	int i;
235
236	for (i = 0; i < n_dwords; ++i)
237		*dst++ = le64_to_cpu(*src++);
238}
239
240static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
241{
242	if (queue_empty(&q->llq))
243		return -EAGAIN;
244
245	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
246	queue_inc_cons(&q->llq);
247	queue_sync_cons_out(q);
248	return 0;
249}
250
251/* High-level queue accessors */
252static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
253{
254	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
255	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
256
257	switch (ent->opcode) {
258	case CMDQ_OP_TLBI_EL2_ALL:
259	case CMDQ_OP_TLBI_NSNH_ALL:
260		break;
261	case CMDQ_OP_PREFETCH_CFG:
262		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
263		break;
264	case CMDQ_OP_CFGI_CD:
265		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
266		fallthrough;
267	case CMDQ_OP_CFGI_STE:
268		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
269		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
270		break;
271	case CMDQ_OP_CFGI_CD_ALL:
272		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
273		break;
274	case CMDQ_OP_CFGI_ALL:
275		/* Cover the entire SID range */
276		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
277		break;
278	case CMDQ_OP_TLBI_NH_VA:
279		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
280		fallthrough;
281	case CMDQ_OP_TLBI_EL2_VA:
282		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
283		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
284		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
285		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
286		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
287		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
288		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
289		break;
290	case CMDQ_OP_TLBI_S2_IPA:
291		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
292		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
293		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
294		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
295		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
296		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
297		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
298		break;
299	case CMDQ_OP_TLBI_NH_ASID:
300		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
301		fallthrough;
302	case CMDQ_OP_TLBI_S12_VMALL:
303		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
304		break;
305	case CMDQ_OP_TLBI_EL2_ASID:
306		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
307		break;
308	case CMDQ_OP_ATC_INV:
309		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
310		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
311		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
312		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
313		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
314		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
315		break;
316	case CMDQ_OP_PRI_RESP:
317		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
318		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
319		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
320		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
321		switch (ent->pri.resp) {
322		case PRI_RESP_DENY:
323		case PRI_RESP_FAIL:
324		case PRI_RESP_SUCC:
325			break;
326		default:
327			return -EINVAL;
328		}
329		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
330		break;
331	case CMDQ_OP_RESUME:
332		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
333		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
334		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
335		break;
336	case CMDQ_OP_CMD_SYNC:
337		if (ent->sync.msiaddr) {
338			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
339			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
340		} else {
341			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
342		}
343		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
344		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
345		break;
346	default:
347		return -ENOENT;
348	}
349
350	return 0;
351}
352
353static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
354{
355	return &smmu->cmdq;
356}
357
358static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
359					 struct arm_smmu_queue *q, u32 prod)
360{
361	struct arm_smmu_cmdq_ent ent = {
362		.opcode = CMDQ_OP_CMD_SYNC,
363	};
364
365	/*
366	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
367	 * payload, so the write will zero the entire command on that platform.
368	 */
369	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
370		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
371				   q->ent_dwords * 8;
372	}
373
374	arm_smmu_cmdq_build_cmd(cmd, &ent);
375}
376
377static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
378				     struct arm_smmu_queue *q)
379{
380	static const char * const cerror_str[] = {
381		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
382		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
383		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
384		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
385	};
386
387	int i;
388	u64 cmd[CMDQ_ENT_DWORDS];
389	u32 cons = readl_relaxed(q->cons_reg);
390	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
391	struct arm_smmu_cmdq_ent cmd_sync = {
392		.opcode = CMDQ_OP_CMD_SYNC,
393	};
394
395	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
396		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
397
398	switch (idx) {
399	case CMDQ_ERR_CERROR_ABT_IDX:
400		dev_err(smmu->dev, "retrying command fetch\n");
401		return;
402	case CMDQ_ERR_CERROR_NONE_IDX:
403		return;
404	case CMDQ_ERR_CERROR_ATC_INV_IDX:
405		/*
406		 * ATC Invalidation Completion timeout. CONS is still pointing
407		 * at the CMD_SYNC. Attempt to complete other pending commands
408		 * by repeating the CMD_SYNC, though we might well end up back
409		 * here since the ATC invalidation may still be pending.
410		 */
411		return;
412	case CMDQ_ERR_CERROR_ILL_IDX:
413	default:
414		break;
415	}
416
417	/*
418	 * We may have concurrent producers, so we need to be careful
419	 * not to touch any of the shadow cmdq state.
420	 */
421	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
422	dev_err(smmu->dev, "skipping command in error state:\n");
423	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
424		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
425
426	/* Convert the erroneous command into a CMD_SYNC */
427	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
428
429	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
430}
431
432static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
433{
434	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
435}
436
437/*
438 * Command queue locking.
439 * This is a form of bastardised rwlock with the following major changes:
440 *
441 * - The only LOCK routines are exclusive_trylock() and shared_lock().
442 *   Neither have barrier semantics, and instead provide only a control
443 *   dependency.
444 *
445 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
446 *   fails if the caller appears to be the last lock holder (yes, this is
447 *   racy). All successful UNLOCK routines have RELEASE semantics.
448 */
449static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
450{
451	int val;
452
453	/*
454	 * We can try to avoid the cmpxchg() loop by simply incrementing the
455	 * lock counter. When held in exclusive state, the lock counter is set
456	 * to INT_MIN so these increments won't hurt as the value will remain
457	 * negative.
458	 */
459	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
460		return;
461
462	do {
463		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
464	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
465}
466
467static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
468{
469	(void)atomic_dec_return_release(&cmdq->lock);
470}
471
472static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
473{
474	if (atomic_read(&cmdq->lock) == 1)
475		return false;
476
477	arm_smmu_cmdq_shared_unlock(cmdq);
478	return true;
479}
480
481#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
482({									\
483	bool __ret;							\
484	local_irq_save(flags);						\
485	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
486	if (!__ret)							\
487		local_irq_restore(flags);				\
488	__ret;								\
489})
490
491#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
492({									\
493	atomic_set_release(&cmdq->lock, 0);				\
494	local_irq_restore(flags);					\
495})
496
497
498/*
499 * Command queue insertion.
500 * This is made fiddly by our attempts to achieve some sort of scalability
501 * since there is one queue shared amongst all of the CPUs in the system.  If
502 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
503 * then you'll *love* this monstrosity.
504 *
505 * The basic idea is to split the queue up into ranges of commands that are
506 * owned by a given CPU; the owner may not have written all of the commands
507 * itself, but is responsible for advancing the hardware prod pointer when
508 * the time comes. The algorithm is roughly:
509 *
510 * 	1. Allocate some space in the queue. At this point we also discover
511 *	   whether the head of the queue is currently owned by another CPU,
512 *	   or whether we are the owner.
513 *
514 *	2. Write our commands into our allocated slots in the queue.
515 *
516 *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
517 *
518 *	4. If we are an owner:
519 *		a. Wait for the previous owner to finish.
520 *		b. Mark the queue head as unowned, which tells us the range
521 *		   that we are responsible for publishing.
522 *		c. Wait for all commands in our owned range to become valid.
523 *		d. Advance the hardware prod pointer.
524 *		e. Tell the next owner we've finished.
525 *
526 *	5. If we are inserting a CMD_SYNC (we may or may not have been an
527 *	   owner), then we need to stick around until it has completed:
528 *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
529 *		   to clear the first 4 bytes.
530 *		b. Otherwise, we spin waiting for the hardware cons pointer to
531 *		   advance past our command.
532 *
533 * The devil is in the details, particularly the use of locking for handling
534 * SYNC completion and freeing up space in the queue before we think that it is
535 * full.
536 */
537static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
538					       u32 sprod, u32 eprod, bool set)
539{
540	u32 swidx, sbidx, ewidx, ebidx;
541	struct arm_smmu_ll_queue llq = {
542		.max_n_shift	= cmdq->q.llq.max_n_shift,
543		.prod		= sprod,
544	};
545
546	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
547	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
548
549	while (llq.prod != eprod) {
550		unsigned long mask;
551		atomic_long_t *ptr;
552		u32 limit = BITS_PER_LONG;
553
554		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
555		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
556
557		ptr = &cmdq->valid_map[swidx];
558
559		if ((swidx == ewidx) && (sbidx < ebidx))
560			limit = ebidx;
561
562		mask = GENMASK(limit - 1, sbidx);
563
564		/*
565		 * The valid bit is the inverse of the wrap bit. This means
566		 * that a zero-initialised queue is invalid and, after marking
567		 * all entries as valid, they become invalid again when we
568		 * wrap.
569		 */
570		if (set) {
571			atomic_long_xor(mask, ptr);
572		} else { /* Poll */
573			unsigned long valid;
574
575			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
576			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
577		}
578
579		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
580	}
581}
582
583/* Mark all entries in the range [sprod, eprod) as valid */
584static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
585					u32 sprod, u32 eprod)
586{
587	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
588}
589
590/* Wait for all entries in the range [sprod, eprod) to become valid */
591static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
592					 u32 sprod, u32 eprod)
593{
594	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
595}
596
597/* Wait for the command queue to become non-full */
598static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
599					     struct arm_smmu_ll_queue *llq)
600{
601	unsigned long flags;
602	struct arm_smmu_queue_poll qp;
603	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
604	int ret = 0;
605
606	/*
607	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
608	 * that fails, spin until somebody else updates it for us.
609	 */
610	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
611		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
612		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
613		llq->val = READ_ONCE(cmdq->q.llq.val);
614		return 0;
615	}
616
617	queue_poll_init(smmu, &qp);
618	do {
619		llq->val = READ_ONCE(cmdq->q.llq.val);
620		if (!queue_full(llq))
621			break;
622
623		ret = queue_poll(&qp);
624	} while (!ret);
625
626	return ret;
627}
628
629/*
630 * Wait until the SMMU signals a CMD_SYNC completion MSI.
631 * Must be called with the cmdq lock held in some capacity.
632 */
633static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
634					  struct arm_smmu_ll_queue *llq)
635{
636	int ret = 0;
637	struct arm_smmu_queue_poll qp;
638	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
639	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
640
641	queue_poll_init(smmu, &qp);
642
643	/*
644	 * The MSI won't generate an event, since it's being written back
645	 * into the command queue.
646	 */
647	qp.wfe = false;
648	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
649	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
650	return ret;
651}
652
653/*
654 * Wait until the SMMU cons index passes llq->prod.
655 * Must be called with the cmdq lock held in some capacity.
656 */
657static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
658					       struct arm_smmu_ll_queue *llq)
659{
660	struct arm_smmu_queue_poll qp;
661	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
662	u32 prod = llq->prod;
663	int ret = 0;
664
665	queue_poll_init(smmu, &qp);
666	llq->val = READ_ONCE(cmdq->q.llq.val);
667	do {
668		if (queue_consumed(llq, prod))
669			break;
670
671		ret = queue_poll(&qp);
672
673		/*
674		 * This needs to be a readl() so that our subsequent call
675		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
676		 *
677		 * Specifically, we need to ensure that we observe all
678		 * shared_lock()s by other CMD_SYNCs that share our owner,
679		 * so that a failing call to tryunlock() means that we're
680		 * the last one out and therefore we can safely advance
681		 * cmdq->q.llq.cons. Roughly speaking:
682		 *
683		 * CPU 0		CPU1			CPU2 (us)
684		 *
685		 * if (sync)
686		 * 	shared_lock();
687		 *
688		 * dma_wmb();
689		 * set_valid_map();
690		 *
691		 * 			if (owner) {
692		 *				poll_valid_map();
693		 *				<control dependency>
694		 *				writel(prod_reg);
695		 *
696		 *						readl(cons_reg);
697		 *						tryunlock();
698		 *
699		 * Requires us to see CPU 0's shared_lock() acquisition.
700		 */
701		llq->cons = readl(cmdq->q.cons_reg);
702	} while (!ret);
703
704	return ret;
705}
706
707static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
708					 struct arm_smmu_ll_queue *llq)
709{
710	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
711		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
712
713	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
714}
715
716static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
717					u32 prod, int n)
718{
719	int i;
720	struct arm_smmu_ll_queue llq = {
721		.max_n_shift	= cmdq->q.llq.max_n_shift,
722		.prod		= prod,
723	};
724
725	for (i = 0; i < n; ++i) {
726		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
727
728		prod = queue_inc_prod_n(&llq, i);
729		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
730	}
731}
732
733/*
734 * This is the actual insertion function, and provides the following
735 * ordering guarantees to callers:
736 *
737 * - There is a dma_wmb() before publishing any commands to the queue.
738 *   This can be relied upon to order prior writes to data structures
739 *   in memory (such as a CD or an STE) before the command.
740 *
741 * - On completion of a CMD_SYNC, there is a control dependency.
742 *   This can be relied upon to order subsequent writes to memory (e.g.
743 *   freeing an IOVA) after completion of the CMD_SYNC.
744 *
745 * - Command insertion is totally ordered, so if two CPUs each race to
746 *   insert their own list of commands then all of the commands from one
747 *   CPU will appear before any of the commands from the other CPU.
748 */
749static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
750				       u64 *cmds, int n, bool sync)
751{
752	u64 cmd_sync[CMDQ_ENT_DWORDS];
753	u32 prod;
754	unsigned long flags;
755	bool owner;
756	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
757	struct arm_smmu_ll_queue llq, head;
758	int ret = 0;
759
760	llq.max_n_shift = cmdq->q.llq.max_n_shift;
761
762	/* 1. Allocate some space in the queue */
763	local_irq_save(flags);
764	llq.val = READ_ONCE(cmdq->q.llq.val);
765	do {
766		u64 old;
767
768		while (!queue_has_space(&llq, n + sync)) {
769			local_irq_restore(flags);
770			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
771				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
772			local_irq_save(flags);
773		}
774
775		head.cons = llq.cons;
776		head.prod = queue_inc_prod_n(&llq, n + sync) |
777					     CMDQ_PROD_OWNED_FLAG;
778
779		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
780		if (old == llq.val)
781			break;
782
783		llq.val = old;
784	} while (1);
785	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
786	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
787	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
788
789	/*
790	 * 2. Write our commands into the queue
791	 * Dependency ordering from the cmpxchg() loop above.
792	 */
793	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
794	if (sync) {
795		prod = queue_inc_prod_n(&llq, n);
796		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
797		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
798
799		/*
800		 * In order to determine completion of our CMD_SYNC, we must
801		 * ensure that the queue can't wrap twice without us noticing.
802		 * We achieve that by taking the cmdq lock as shared before
803		 * marking our slot as valid.
804		 */
805		arm_smmu_cmdq_shared_lock(cmdq);
806	}
807
808	/* 3. Mark our slots as valid, ensuring commands are visible first */
809	dma_wmb();
810	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
811
812	/* 4. If we are the owner, take control of the SMMU hardware */
813	if (owner) {
814		/* a. Wait for previous owner to finish */
815		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
816
817		/* b. Stop gathering work by clearing the owned flag */
818		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
819						   &cmdq->q.llq.atomic.prod);
820		prod &= ~CMDQ_PROD_OWNED_FLAG;
821
822		/*
823		 * c. Wait for any gathered work to be written to the queue.
824		 * Note that we read our own entries so that we have the control
825		 * dependency required by (d).
826		 */
827		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
828
829		/*
830		 * d. Advance the hardware prod pointer
831		 * Control dependency ordering from the entries becoming valid.
832		 */
833		writel_relaxed(prod, cmdq->q.prod_reg);
834
835		/*
836		 * e. Tell the next owner we're done
837		 * Make sure we've updated the hardware first, so that we don't
838		 * race to update prod and potentially move it backwards.
839		 */
840		atomic_set_release(&cmdq->owner_prod, prod);
841	}
842
843	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
844	if (sync) {
845		llq.prod = queue_inc_prod_n(&llq, n);
846		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
847		if (ret) {
848			dev_err_ratelimited(smmu->dev,
849					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
850					    llq.prod,
851					    readl_relaxed(cmdq->q.prod_reg),
852					    readl_relaxed(cmdq->q.cons_reg));
853		}
854
855		/*
856		 * Try to unlock the cmdq lock. This will fail if we're the last
857		 * reader, in which case we can safely update cmdq->q.llq.cons
858		 */
859		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
860			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
861			arm_smmu_cmdq_shared_unlock(cmdq);
862		}
863	}
864
865	local_irq_restore(flags);
866	return ret;
867}
868
869static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
870				     struct arm_smmu_cmdq_ent *ent,
871				     bool sync)
872{
873	u64 cmd[CMDQ_ENT_DWORDS];
874
875	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
876		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
877			 ent->opcode);
878		return -EINVAL;
879	}
880
881	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
882}
883
884static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
885				   struct arm_smmu_cmdq_ent *ent)
886{
887	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
888}
889
890static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
891					     struct arm_smmu_cmdq_ent *ent)
892{
893	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
894}
895
896static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
897				    struct arm_smmu_cmdq_batch *cmds,
898				    struct arm_smmu_cmdq_ent *cmd)
899{
900	int index;
901
902	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
903	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
904		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
905		cmds->num = 0;
906	}
907
908	if (cmds->num == CMDQ_BATCH_ENTRIES) {
909		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
910		cmds->num = 0;
911	}
912
913	index = cmds->num * CMDQ_ENT_DWORDS;
914	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
915		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
916			 cmd->opcode);
917		return;
918	}
919
920	cmds->num++;
921}
922
923static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
924				      struct arm_smmu_cmdq_batch *cmds)
925{
926	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
927}
928
929static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
930				   struct iommu_page_response *resp)
931{
932	struct arm_smmu_cmdq_ent cmd = {0};
933	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
934	int sid = master->streams[0].id;
935
936	if (WARN_ON(!master->stall_enabled))
937		return;
938
939	cmd.opcode		= CMDQ_OP_RESUME;
940	cmd.resume.sid		= sid;
941	cmd.resume.stag		= resp->grpid;
942	switch (resp->code) {
943	case IOMMU_PAGE_RESP_INVALID:
944	case IOMMU_PAGE_RESP_FAILURE:
945		cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
946		break;
947	case IOMMU_PAGE_RESP_SUCCESS:
948		cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
949		break;
950	default:
951		break;
952	}
953
954	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
955	/*
956	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
957	 * RESUME consumption guarantees that the stalled transaction will be
958	 * terminated... at some point in the future. PRI_RESP is fire and
959	 * forget.
960	 */
961}
962
963/* Context descriptor manipulation functions */
964void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
965{
966	struct arm_smmu_cmdq_ent cmd = {
967		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
968			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
969		.tlbi.asid = asid,
970	};
971
972	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
973}
974
975/*
976 * Based on the value of ent report which bits of the STE the HW will access. It
977 * would be nice if this was complete according to the spec, but minimally it
978 * has to capture the bits this driver uses.
979 */
980static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent,
981				  struct arm_smmu_ste *used_bits)
982{
983	unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent->data[0]));
984
985	used_bits->data[0] = cpu_to_le64(STRTAB_STE_0_V);
986	if (!(ent->data[0] & cpu_to_le64(STRTAB_STE_0_V)))
987		return;
988
989	used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
990
991	/* S1 translates */
992	if (cfg & BIT(0)) {
993		used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
994						  STRTAB_STE_0_S1CTXPTR_MASK |
995						  STRTAB_STE_0_S1CDMAX);
996		used_bits->data[1] |=
997			cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
998				    STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
999				    STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
1000				    STRTAB_STE_1_EATS);
1001		used_bits->data[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
1002	}
1003
1004	/* S2 translates */
1005	if (cfg & BIT(1)) {
1006		used_bits->data[1] |=
1007			cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
1008		used_bits->data[2] |=
1009			cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
1010				    STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
1011				    STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R);
1012		used_bits->data[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
1013	}
1014
1015	if (cfg == STRTAB_STE_0_CFG_BYPASS)
1016		used_bits->data[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
1017}
1018
1019/*
1020 * Figure out if we can do a hitless update of entry to become target. Returns a
1021 * bit mask where 1 indicates that qword needs to be set disruptively.
1022 * unused_update is an intermediate value of entry that has unused bits set to
1023 * their new values.
1024 */
1025static u8 arm_smmu_entry_qword_diff(const struct arm_smmu_ste *entry,
1026				    const struct arm_smmu_ste *target,
1027				    struct arm_smmu_ste *unused_update)
1028{
1029	struct arm_smmu_ste target_used = {};
1030	struct arm_smmu_ste cur_used = {};
1031	u8 used_qword_diff = 0;
1032	unsigned int i;
1033
1034	arm_smmu_get_ste_used(entry, &cur_used);
1035	arm_smmu_get_ste_used(target, &target_used);
1036
1037	for (i = 0; i != ARRAY_SIZE(target_used.data); i++) {
1038		/*
1039		 * Check that masks are up to date, the make functions are not
1040		 * allowed to set a bit to 1 if the used function doesn't say it
1041		 * is used.
1042		 */
1043		WARN_ON_ONCE(target->data[i] & ~target_used.data[i]);
1044
1045		/* Bits can change because they are not currently being used */
1046		unused_update->data[i] = (entry->data[i] & cur_used.data[i]) |
1047					 (target->data[i] & ~cur_used.data[i]);
1048		/*
1049		 * Each bit indicates that a used bit in a qword needs to be
1050		 * changed after unused_update is applied.
1051		 */
1052		if ((unused_update->data[i] & target_used.data[i]) !=
1053		    target->data[i])
1054			used_qword_diff |= 1 << i;
1055	}
1056	return used_qword_diff;
1057}
1058
1059static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid,
1060		      struct arm_smmu_ste *entry,
1061		      const struct arm_smmu_ste *target, unsigned int start,
1062		      unsigned int len)
1063{
1064	bool changed = false;
1065	unsigned int i;
1066
1067	for (i = start; len != 0; len--, i++) {
1068		if (entry->data[i] != target->data[i]) {
1069			WRITE_ONCE(entry->data[i], target->data[i]);
1070			changed = true;
1071		}
1072	}
1073
1074	if (changed)
1075		arm_smmu_sync_ste_for_sid(smmu, sid);
1076	return changed;
1077}
1078
1079/*
1080 * Update the STE/CD to the target configuration. The transition from the
1081 * current entry to the target entry takes place over multiple steps that
1082 * attempts to make the transition hitless if possible. This function takes care
1083 * not to create a situation where the HW can perceive a corrupted entry. HW is
1084 * only required to have a 64 bit atomicity with stores from the CPU, while
1085 * entries are many 64 bit values big.
1086 *
1087 * The difference between the current value and the target value is analyzed to
1088 * determine which of three updates are required - disruptive, hitless or no
1089 * change.
1090 *
1091 * In the most general disruptive case we can make any update in three steps:
1092 *  - Disrupting the entry (V=0)
1093 *  - Fill now unused qwords, execpt qword 0 which contains V
1094 *  - Make qword 0 have the final value and valid (V=1) with a single 64
1095 *    bit store
1096 *
1097 * However this disrupts the HW while it is happening. There are several
1098 * interesting cases where a STE/CD can be updated without disturbing the HW
1099 * because only a small number of bits are changing (S1DSS, CONFIG, etc) or
1100 * because the used bits don't intersect. We can detect this by calculating how
1101 * many 64 bit values need update after adjusting the unused bits and skip the
1102 * V=0 process. This relies on the IGNORED behavior described in the
1103 * specification.
1104 */
1105static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
1106			       struct arm_smmu_ste *entry,
1107			       const struct arm_smmu_ste *target)
1108{
1109	unsigned int num_entry_qwords = ARRAY_SIZE(target->data);
1110	struct arm_smmu_device *smmu = master->smmu;
1111	struct arm_smmu_ste unused_update;
1112	u8 used_qword_diff;
1113
1114	used_qword_diff =
1115		arm_smmu_entry_qword_diff(entry, target, &unused_update);
1116	if (hweight8(used_qword_diff) == 1) {
1117		/*
1118		 * Only one qword needs its used bits to be changed. This is a
1119		 * hitless update, update all bits the current STE is ignoring
1120		 * to their new values, then update a single "critical qword" to
1121		 * change the STE and finally 0 out any bits that are now unused
1122		 * in the target configuration.
1123		 */
1124		unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
1125
1126		/*
1127		 * Skip writing unused bits in the critical qword since we'll be
1128		 * writing it in the next step anyways. This can save a sync
1129		 * when the only change is in that qword.
1130		 */
1131		unused_update.data[critical_qword_index] =
1132			entry->data[critical_qword_index];
1133		entry_set(smmu, sid, entry, &unused_update, 0, num_entry_qwords);
1134		entry_set(smmu, sid, entry, target, critical_qword_index, 1);
1135		entry_set(smmu, sid, entry, target, 0, num_entry_qwords);
1136	} else if (used_qword_diff) {
1137		/*
1138		 * At least two qwords need their inuse bits to be changed. This
1139		 * requires a breaking update, zero the V bit, write all qwords
1140		 * but 0, then set qword 0
1141		 */
1142		unused_update.data[0] = entry->data[0] &
1143					cpu_to_le64(~STRTAB_STE_0_V);
1144		entry_set(smmu, sid, entry, &unused_update, 0, 1);
1145		entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1);
1146		entry_set(smmu, sid, entry, target, 0, 1);
1147	} else {
1148		/*
1149		 * No inuse bit changed. Sanity check that all unused bits are 0
1150		 * in the entry. The target was already sanity checked by
1151		 * compute_qword_diff().
1152		 */
1153		WARN_ON_ONCE(
1154			entry_set(smmu, sid, entry, target, 0, num_entry_qwords));
1155	}
1156
1157	/* It's likely that we'll want to use the new STE soon */
1158	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
1159		struct arm_smmu_cmdq_ent
1160			prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
1161					 .prefetch = {
1162						 .sid = sid,
1163					 } };
1164
1165		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1166	}
1167}
1168
1169static void arm_smmu_sync_cd(struct arm_smmu_master *master,
1170			     int ssid, bool leaf)
1171{
1172	size_t i;
1173	struct arm_smmu_cmdq_batch cmds;
1174	struct arm_smmu_device *smmu = master->smmu;
1175	struct arm_smmu_cmdq_ent cmd = {
1176		.opcode	= CMDQ_OP_CFGI_CD,
1177		.cfgi	= {
1178			.ssid	= ssid,
1179			.leaf	= leaf,
1180		},
1181	};
1182
1183	cmds.num = 0;
1184	for (i = 0; i < master->num_streams; i++) {
1185		cmd.cfgi.sid = master->streams[i].id;
1186		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1187	}
1188
1189	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1190}
1191
1192static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1193					struct arm_smmu_l1_ctx_desc *l1_desc)
1194{
1195	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1196
1197	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1198					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1199	if (!l1_desc->l2ptr) {
1200		dev_warn(smmu->dev,
1201			 "failed to allocate context descriptor table\n");
1202		return -ENOMEM;
1203	}
1204	return 0;
1205}
1206
1207static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1208				      struct arm_smmu_l1_ctx_desc *l1_desc)
1209{
1210	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1211		  CTXDESC_L1_DESC_V;
1212
1213	/* See comment in arm_smmu_write_ctx_desc() */
1214	WRITE_ONCE(*dst, cpu_to_le64(val));
1215}
1216
1217static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
1218{
1219	__le64 *l1ptr;
1220	unsigned int idx;
1221	struct arm_smmu_l1_ctx_desc *l1_desc;
1222	struct arm_smmu_device *smmu = master->smmu;
1223	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1224
1225	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1226		return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
1227
1228	idx = ssid >> CTXDESC_SPLIT;
1229	l1_desc = &cd_table->l1_desc[idx];
1230	if (!l1_desc->l2ptr) {
1231		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1232			return NULL;
1233
1234		l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1235		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1236		/* An invalid L1CD can be cached */
1237		arm_smmu_sync_cd(master, ssid, false);
1238	}
1239	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1240	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1241}
1242
1243int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
1244			    struct arm_smmu_ctx_desc *cd)
1245{
1246	/*
1247	 * This function handles the following cases:
1248	 *
1249	 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1250	 * (2) Install a secondary CD, for SID+SSID traffic.
1251	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1252	 *     CD, then invalidate the old entry and mappings.
1253	 * (4) Quiesce the context without clearing the valid bit. Disable
1254	 *     translation, and ignore any translation fault.
1255	 * (5) Remove a secondary CD.
1256	 */
1257	u64 val;
1258	bool cd_live;
1259	__le64 *cdptr;
1260	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1261	struct arm_smmu_device *smmu = master->smmu;
1262
1263	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
1264		return -E2BIG;
1265
1266	cdptr = arm_smmu_get_cd_ptr(master, ssid);
1267	if (!cdptr)
1268		return -ENOMEM;
1269
1270	val = le64_to_cpu(cdptr[0]);
1271	cd_live = !!(val & CTXDESC_CD_0_V);
1272
1273	if (!cd) { /* (5) */
1274		val = 0;
1275	} else if (cd == &quiet_cd) { /* (4) */
1276		if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1277			val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R);
1278		val |= CTXDESC_CD_0_TCR_EPD0;
1279	} else if (cd_live) { /* (3) */
1280		val &= ~CTXDESC_CD_0_ASID;
1281		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1282		/*
1283		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1284		 * this substream's traffic
1285		 */
1286	} else { /* (1) and (2) */
1287		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1288		cdptr[2] = 0;
1289		cdptr[3] = cpu_to_le64(cd->mair);
1290
1291		/*
1292		 * STE may be live, and the SMMU might read dwords of this CD in any
1293		 * order. Ensure that it observes valid values before reading
1294		 * V=1.
1295		 */
1296		arm_smmu_sync_cd(master, ssid, true);
1297
1298		val = cd->tcr |
1299#ifdef __BIG_ENDIAN
1300			CTXDESC_CD_0_ENDI |
1301#endif
1302			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1303			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1304			CTXDESC_CD_0_AA64 |
1305			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1306			CTXDESC_CD_0_V;
1307
1308		if (cd_table->stall_enabled)
1309			val |= CTXDESC_CD_0_S;
1310	}
1311
1312	/*
1313	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1314	 * "Configuration structures and configuration invalidation completion"
1315	 *
1316	 *   The size of single-copy atomic reads made by the SMMU is
1317	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1318	 *   field within an aligned 64-bit span of a structure can be altered
1319	 *   without first making the structure invalid.
1320	 */
1321	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1322	arm_smmu_sync_cd(master, ssid, true);
1323	return 0;
1324}
1325
1326static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1327{
1328	int ret;
1329	size_t l1size;
1330	size_t max_contexts;
1331	struct arm_smmu_device *smmu = master->smmu;
1332	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1333
1334	cd_table->stall_enabled = master->stall_enabled;
1335	cd_table->s1cdmax = master->ssid_bits;
1336	max_contexts = 1 << cd_table->s1cdmax;
1337
1338	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1339	    max_contexts <= CTXDESC_L2_ENTRIES) {
1340		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1341		cd_table->num_l1_ents = max_contexts;
1342
1343		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1344	} else {
1345		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1346		cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
1347						  CTXDESC_L2_ENTRIES);
1348
1349		cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
1350					      sizeof(*cd_table->l1_desc),
1351					      GFP_KERNEL);
1352		if (!cd_table->l1_desc)
1353			return -ENOMEM;
1354
1355		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1356	}
1357
1358	cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
1359					   GFP_KERNEL);
1360	if (!cd_table->cdtab) {
1361		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1362		ret = -ENOMEM;
1363		goto err_free_l1;
1364	}
1365
1366	return 0;
1367
1368err_free_l1:
1369	if (cd_table->l1_desc) {
1370		devm_kfree(smmu->dev, cd_table->l1_desc);
1371		cd_table->l1_desc = NULL;
1372	}
1373	return ret;
1374}
1375
1376static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1377{
1378	int i;
1379	size_t size, l1size;
1380	struct arm_smmu_device *smmu = master->smmu;
1381	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1382
1383	if (cd_table->l1_desc) {
1384		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1385
1386		for (i = 0; i < cd_table->num_l1_ents; i++) {
1387			if (!cd_table->l1_desc[i].l2ptr)
1388				continue;
1389
1390			dmam_free_coherent(smmu->dev, size,
1391					   cd_table->l1_desc[i].l2ptr,
1392					   cd_table->l1_desc[i].l2ptr_dma);
1393		}
1394		devm_kfree(smmu->dev, cd_table->l1_desc);
1395		cd_table->l1_desc = NULL;
1396
1397		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1398	} else {
1399		l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1400	}
1401
1402	dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
1403	cd_table->cdtab_dma = 0;
1404	cd_table->cdtab = NULL;
1405}
1406
1407bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1408{
1409	bool free;
1410	struct arm_smmu_ctx_desc *old_cd;
1411
1412	if (!cd->asid)
1413		return false;
1414
1415	free = refcount_dec_and_test(&cd->refs);
1416	if (free) {
1417		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1418		WARN_ON(old_cd != cd);
1419	}
1420	return free;
1421}
1422
1423/* Stream table manipulation functions */
1424static void
1425arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1426{
1427	u64 val = 0;
1428
1429	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1430	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1431
1432	/* See comment in arm_smmu_write_ctx_desc() */
1433	WRITE_ONCE(*dst, cpu_to_le64(val));
1434}
1435
1436static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1437{
1438	struct arm_smmu_cmdq_ent cmd = {
1439		.opcode	= CMDQ_OP_CFGI_STE,
1440		.cfgi	= {
1441			.sid	= sid,
1442			.leaf	= true,
1443		},
1444	};
1445
1446	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1447}
1448
1449static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
1450{
1451	memset(target, 0, sizeof(*target));
1452	target->data[0] = cpu_to_le64(
1453		STRTAB_STE_0_V |
1454		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
1455}
1456
1457static void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
1458				     struct arm_smmu_ste *target)
1459{
1460	memset(target, 0, sizeof(*target));
1461	target->data[0] = cpu_to_le64(
1462		STRTAB_STE_0_V |
1463		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
1464
1465	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1466		target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1467							 STRTAB_STE_1_SHCFG_INCOMING));
1468}
1469
1470static void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
1471				      struct arm_smmu_master *master)
1472{
1473	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1474	struct arm_smmu_device *smmu = master->smmu;
1475
1476	memset(target, 0, sizeof(*target));
1477	target->data[0] = cpu_to_le64(
1478		STRTAB_STE_0_V |
1479		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1480		FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
1481		(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1482		FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
1483
1484	target->data[1] = cpu_to_le64(
1485		FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1486		FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1487		FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1488		FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1489		((smmu->features & ARM_SMMU_FEAT_STALLS &&
1490		  !master->stall_enabled) ?
1491			 STRTAB_STE_1_S1STALLD :
1492			 0) |
1493		FIELD_PREP(STRTAB_STE_1_EATS,
1494			   master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1495
1496	if (smmu->features & ARM_SMMU_FEAT_E2H) {
1497		/*
1498		 * To support BTM the streamworld needs to match the
1499		 * configuration of the CPU so that the ASID broadcasts are
1500		 * properly matched. This means either S/NS-EL2-E2H (hypervisor)
1501		 * or NS-EL1 (guest). Since an SVA domain can be installed in a
1502		 * PASID this should always use a BTM compatible configuration
1503		 * if the HW supports it.
1504		 */
1505		target->data[1] |= cpu_to_le64(
1506			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
1507	} else {
1508		target->data[1] |= cpu_to_le64(
1509			FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1510
1511		/*
1512		 * VMID 0 is reserved for stage-2 bypass EL1 STEs, see
1513		 * arm_smmu_domain_alloc_id()
1514		 */
1515		target->data[2] =
1516			cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
1517	}
1518}
1519
1520static void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
1521					struct arm_smmu_master *master,
1522					struct arm_smmu_domain *smmu_domain)
1523{
1524	struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
1525	const struct io_pgtable_cfg *pgtbl_cfg =
1526		&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
1527	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
1528		&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1529	u64 vtcr_val;
1530	struct arm_smmu_device *smmu = master->smmu;
1531
1532	memset(target, 0, sizeof(*target));
1533	target->data[0] = cpu_to_le64(
1534		STRTAB_STE_0_V |
1535		FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
1536
1537	target->data[1] = cpu_to_le64(
1538		FIELD_PREP(STRTAB_STE_1_EATS,
1539			   master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
1540
1541	if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
1542		target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1543							  STRTAB_STE_1_SHCFG_INCOMING));
1544
1545	vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
1546		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
1547		   FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
1548		   FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
1549		   FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
1550		   FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
1551		   FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
1552	target->data[2] = cpu_to_le64(
1553		FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1554		FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
1555		STRTAB_STE_2_S2AA64 |
1556#ifdef __BIG_ENDIAN
1557		STRTAB_STE_2_S2ENDI |
1558#endif
1559		STRTAB_STE_2_S2PTW |
1560		STRTAB_STE_2_S2R);
1561
1562	target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
1563				      STRTAB_STE_3_S2TTB_MASK);
1564}
1565
1566/*
1567 * This can safely directly manipulate the STE memory without a sync sequence
1568 * because the STE table has not been installed in the SMMU yet.
1569 */
1570static void arm_smmu_init_initial_stes(struct arm_smmu_device *smmu,
1571				       struct arm_smmu_ste *strtab,
1572				       unsigned int nent)
1573{
1574	unsigned int i;
1575
1576	for (i = 0; i < nent; ++i) {
1577		if (disable_bypass)
1578			arm_smmu_make_abort_ste(strtab);
1579		else
1580			arm_smmu_make_bypass_ste(smmu, strtab);
1581		strtab++;
1582	}
1583}
1584
1585static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1586{
1587	size_t size;
1588	void *strtab;
1589	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1590	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1591
1592	if (desc->l2ptr)
1593		return 0;
1594
1595	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1596	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1597
1598	desc->span = STRTAB_SPLIT + 1;
1599	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1600					  GFP_KERNEL);
1601	if (!desc->l2ptr) {
1602		dev_err(smmu->dev,
1603			"failed to allocate l2 stream table for SID %u\n",
1604			sid);
1605		return -ENOMEM;
1606	}
1607
1608	arm_smmu_init_initial_stes(smmu, desc->l2ptr, 1 << STRTAB_SPLIT);
1609	arm_smmu_write_strtab_l1_desc(strtab, desc);
1610	return 0;
1611}
1612
1613static struct arm_smmu_master *
1614arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1615{
1616	struct rb_node *node;
1617	struct arm_smmu_stream *stream;
1618
1619	lockdep_assert_held(&smmu->streams_mutex);
1620
1621	node = smmu->streams.rb_node;
1622	while (node) {
1623		stream = rb_entry(node, struct arm_smmu_stream, node);
1624		if (stream->id < sid)
1625			node = node->rb_right;
1626		else if (stream->id > sid)
1627			node = node->rb_left;
1628		else
1629			return stream->master;
1630	}
1631
1632	return NULL;
1633}
1634
1635/* IRQ and event handlers */
1636static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1637{
1638	int ret = 0;
1639	u32 perm = 0;
1640	struct arm_smmu_master *master;
1641	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1642	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1643	struct iopf_fault fault_evt = { };
1644	struct iommu_fault *flt = &fault_evt.fault;
1645
1646	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1647	case EVT_ID_TRANSLATION_FAULT:
1648	case EVT_ID_ADDR_SIZE_FAULT:
1649	case EVT_ID_ACCESS_FAULT:
1650	case EVT_ID_PERMISSION_FAULT:
1651		break;
1652	default:
1653		return -EOPNOTSUPP;
1654	}
1655
1656	/* Stage-2 is always pinned at the moment */
1657	if (evt[1] & EVTQ_1_S2)
1658		return -EFAULT;
1659
1660	if (!(evt[1] & EVTQ_1_STALL))
1661		return -EOPNOTSUPP;
1662
1663	if (evt[1] & EVTQ_1_RnW)
1664		perm |= IOMMU_FAULT_PERM_READ;
1665	else
1666		perm |= IOMMU_FAULT_PERM_WRITE;
1667
1668	if (evt[1] & EVTQ_1_InD)
1669		perm |= IOMMU_FAULT_PERM_EXEC;
1670
1671	if (evt[1] & EVTQ_1_PnU)
1672		perm |= IOMMU_FAULT_PERM_PRIV;
1673
1674	flt->type = IOMMU_FAULT_PAGE_REQ;
1675	flt->prm = (struct iommu_fault_page_request) {
1676		.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1677		.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1678		.perm = perm,
1679		.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1680	};
1681
1682	if (ssid_valid) {
1683		flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1684		flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1685	}
1686
1687	mutex_lock(&smmu->streams_mutex);
1688	master = arm_smmu_find_master(smmu, sid);
1689	if (!master) {
1690		ret = -EINVAL;
1691		goto out_unlock;
1692	}
1693
1694	iommu_report_device_fault(master->dev, &fault_evt);
1695out_unlock:
1696	mutex_unlock(&smmu->streams_mutex);
1697	return ret;
1698}
1699
1700static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1701{
1702	int i, ret;
1703	struct arm_smmu_device *smmu = dev;
1704	struct arm_smmu_queue *q = &smmu->evtq.q;
1705	struct arm_smmu_ll_queue *llq = &q->llq;
1706	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1707				      DEFAULT_RATELIMIT_BURST);
1708	u64 evt[EVTQ_ENT_DWORDS];
1709
1710	do {
1711		while (!queue_remove_raw(q, evt)) {
1712			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1713
1714			ret = arm_smmu_handle_evt(smmu, evt);
1715			if (!ret || !__ratelimit(&rs))
1716				continue;
1717
1718			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1719			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1720				dev_info(smmu->dev, "\t0x%016llx\n",
1721					 (unsigned long long)evt[i]);
1722
1723			cond_resched();
1724		}
1725
1726		/*
1727		 * Not much we can do on overflow, so scream and pretend we're
1728		 * trying harder.
1729		 */
1730		if (queue_sync_prod_in(q) == -EOVERFLOW)
1731			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1732	} while (!queue_empty(llq));
1733
1734	/* Sync our overflow flag, as we believe we're up to speed */
1735	queue_sync_cons_ovf(q);
1736	return IRQ_HANDLED;
1737}
1738
1739static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1740{
1741	u32 sid, ssid;
1742	u16 grpid;
1743	bool ssv, last;
1744
1745	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1746	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1747	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1748	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1749	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1750
1751	dev_info(smmu->dev, "unexpected PRI request received:\n");
1752	dev_info(smmu->dev,
1753		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1754		 sid, ssid, grpid, last ? "L" : "",
1755		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1756		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1757		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1758		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1759		 evt[1] & PRIQ_1_ADDR_MASK);
1760
1761	if (last) {
1762		struct arm_smmu_cmdq_ent cmd = {
1763			.opcode			= CMDQ_OP_PRI_RESP,
1764			.substream_valid	= ssv,
1765			.pri			= {
1766				.sid	= sid,
1767				.ssid	= ssid,
1768				.grpid	= grpid,
1769				.resp	= PRI_RESP_DENY,
1770			},
1771		};
1772
1773		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1774	}
1775}
1776
1777static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1778{
1779	struct arm_smmu_device *smmu = dev;
1780	struct arm_smmu_queue *q = &smmu->priq.q;
1781	struct arm_smmu_ll_queue *llq = &q->llq;
1782	u64 evt[PRIQ_ENT_DWORDS];
1783
1784	do {
1785		while (!queue_remove_raw(q, evt))
1786			arm_smmu_handle_ppr(smmu, evt);
1787
1788		if (queue_sync_prod_in(q) == -EOVERFLOW)
1789			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1790	} while (!queue_empty(llq));
1791
1792	/* Sync our overflow flag, as we believe we're up to speed */
1793	queue_sync_cons_ovf(q);
1794	return IRQ_HANDLED;
1795}
1796
1797static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1798
1799static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1800{
1801	u32 gerror, gerrorn, active;
1802	struct arm_smmu_device *smmu = dev;
1803
1804	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1805	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1806
1807	active = gerror ^ gerrorn;
1808	if (!(active & GERROR_ERR_MASK))
1809		return IRQ_NONE; /* No errors pending */
1810
1811	dev_warn(smmu->dev,
1812		 "unexpected global error reported (0x%08x), this could be serious\n",
1813		 active);
1814
1815	if (active & GERROR_SFM_ERR) {
1816		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1817		arm_smmu_device_disable(smmu);
1818	}
1819
1820	if (active & GERROR_MSI_GERROR_ABT_ERR)
1821		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1822
1823	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1824		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1825
1826	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1827		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1828
1829	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1830		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1831
1832	if (active & GERROR_PRIQ_ABT_ERR)
1833		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1834
1835	if (active & GERROR_EVTQ_ABT_ERR)
1836		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1837
1838	if (active & GERROR_CMDQ_ERR)
1839		arm_smmu_cmdq_skip_err(smmu);
1840
1841	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1842	return IRQ_HANDLED;
1843}
1844
1845static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1846{
1847	struct arm_smmu_device *smmu = dev;
1848
1849	arm_smmu_evtq_thread(irq, dev);
1850	if (smmu->features & ARM_SMMU_FEAT_PRI)
1851		arm_smmu_priq_thread(irq, dev);
1852
1853	return IRQ_HANDLED;
1854}
1855
1856static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1857{
1858	arm_smmu_gerror_handler(irq, dev);
1859	return IRQ_WAKE_THREAD;
1860}
1861
1862static void
1863arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1864			struct arm_smmu_cmdq_ent *cmd)
1865{
1866	size_t log2_span;
1867	size_t span_mask;
1868	/* ATC invalidates are always on 4096-bytes pages */
1869	size_t inval_grain_shift = 12;
1870	unsigned long page_start, page_end;
1871
1872	/*
1873	 * ATS and PASID:
1874	 *
1875	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1876	 * prefix. In that case all ATC entries within the address range are
1877	 * invalidated, including those that were requested with a PASID! There
1878	 * is no way to invalidate only entries without PASID.
1879	 *
1880	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1881	 * traffic), translation requests without PASID create ATC entries
1882	 * without PASID, which must be invalidated with substream_valid clear.
1883	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1884	 * ATC entries within the address range.
1885	 */
1886	*cmd = (struct arm_smmu_cmdq_ent) {
1887		.opcode			= CMDQ_OP_ATC_INV,
1888		.substream_valid	= (ssid != IOMMU_NO_PASID),
1889		.atc.ssid		= ssid,
1890	};
1891
1892	if (!size) {
1893		cmd->atc.size = ATC_INV_SIZE_ALL;
1894		return;
1895	}
1896
1897	page_start	= iova >> inval_grain_shift;
1898	page_end	= (iova + size - 1) >> inval_grain_shift;
1899
1900	/*
1901	 * In an ATS Invalidate Request, the address must be aligned on the
1902	 * range size, which must be a power of two number of page sizes. We
1903	 * thus have to choose between grossly over-invalidating the region, or
1904	 * splitting the invalidation into multiple commands. For simplicity
1905	 * we'll go with the first solution, but should refine it in the future
1906	 * if multiple commands are shown to be more efficient.
1907	 *
1908	 * Find the smallest power of two that covers the range. The most
1909	 * significant differing bit between the start and end addresses,
1910	 * fls(start ^ end), indicates the required span. For example:
1911	 *
1912	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1913	 *		x = 0b1000 ^ 0b1011 = 0b11
1914	 *		span = 1 << fls(x) = 4
1915	 *
1916	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1917	 *		x = 0b0111 ^ 0b1010 = 0b1101
1918	 *		span = 1 << fls(x) = 16
1919	 */
1920	log2_span	= fls_long(page_start ^ page_end);
1921	span_mask	= (1ULL << log2_span) - 1;
1922
1923	page_start	&= ~span_mask;
1924
1925	cmd->atc.addr	= page_start << inval_grain_shift;
1926	cmd->atc.size	= log2_span;
1927}
1928
1929static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1930{
1931	int i;
1932	struct arm_smmu_cmdq_ent cmd;
1933	struct arm_smmu_cmdq_batch cmds;
1934
1935	arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1936
1937	cmds.num = 0;
1938	for (i = 0; i < master->num_streams; i++) {
1939		cmd.atc.sid = master->streams[i].id;
1940		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1941	}
1942
1943	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1944}
1945
1946int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1947			    unsigned long iova, size_t size)
1948{
1949	int i;
1950	unsigned long flags;
1951	struct arm_smmu_cmdq_ent cmd;
1952	struct arm_smmu_master *master;
1953	struct arm_smmu_cmdq_batch cmds;
1954
1955	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1956		return 0;
1957
1958	/*
1959	 * Ensure that we've completed prior invalidation of the main TLBs
1960	 * before we read 'nr_ats_masters' in case of a concurrent call to
1961	 * arm_smmu_enable_ats():
1962	 *
1963	 *	// unmap()			// arm_smmu_enable_ats()
1964	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1965	 *	smp_mb();			[...]
1966	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1967	 *
1968	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1969	 * ATS was enabled at the PCI device before completion of the TLBI.
1970	 */
1971	smp_mb();
1972	if (!atomic_read(&smmu_domain->nr_ats_masters))
1973		return 0;
1974
1975	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1976
1977	cmds.num = 0;
1978
1979	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1980	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1981		if (!master->ats_enabled)
1982			continue;
1983
1984		for (i = 0; i < master->num_streams; i++) {
1985			cmd.atc.sid = master->streams[i].id;
1986			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1987		}
1988	}
1989	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1990
1991	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1992}
1993
1994/* IO_PGTABLE API */
1995static void arm_smmu_tlb_inv_context(void *cookie)
1996{
1997	struct arm_smmu_domain *smmu_domain = cookie;
1998	struct arm_smmu_device *smmu = smmu_domain->smmu;
1999	struct arm_smmu_cmdq_ent cmd;
2000
2001	/*
2002	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2003	 * PTEs previously cleared by unmaps on the current CPU not yet visible
2004	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2005	 * insertion to guarantee those are observed before the TLBI. Do be
2006	 * careful, 007.
2007	 */
2008	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2009		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
2010	} else {
2011		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
2012		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2013		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
2014	}
2015	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2016}
2017
2018static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
2019				     unsigned long iova, size_t size,
2020				     size_t granule,
2021				     struct arm_smmu_domain *smmu_domain)
2022{
2023	struct arm_smmu_device *smmu = smmu_domain->smmu;
2024	unsigned long end = iova + size, num_pages = 0, tg = 0;
2025	size_t inv_range = granule;
2026	struct arm_smmu_cmdq_batch cmds;
2027
2028	if (!size)
2029		return;
2030
2031	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2032		/* Get the leaf page size */
2033		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2034
2035		num_pages = size >> tg;
2036
2037		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
2038		cmd->tlbi.tg = (tg - 10) / 2;
2039
2040		/*
2041		 * Determine what level the granule is at. For non-leaf, both
2042		 * io-pgtable and SVA pass a nominal last-level granule because
2043		 * they don't know what level(s) actually apply, so ignore that
2044		 * and leave TTL=0. However for various errata reasons we still
2045		 * want to use a range command, so avoid the SVA corner case
2046		 * where both scale and num could be 0 as well.
2047		 */
2048		if (cmd->tlbi.leaf)
2049			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2050		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
2051			num_pages++;
2052	}
2053
2054	cmds.num = 0;
2055
2056	while (iova < end) {
2057		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2058			/*
2059			 * On each iteration of the loop, the range is 5 bits
2060			 * worth of the aligned size remaining.
2061			 * The range in pages is:
2062			 *
2063			 * range = (num_pages & (0x1f << __ffs(num_pages)))
2064			 */
2065			unsigned long scale, num;
2066
2067			/* Determine the power of 2 multiple number of pages */
2068			scale = __ffs(num_pages);
2069			cmd->tlbi.scale = scale;
2070
2071			/* Determine how many chunks of 2^scale size we have */
2072			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2073			cmd->tlbi.num = num - 1;
2074
2075			/* range is num * 2^scale * pgsize */
2076			inv_range = num << (scale + tg);
2077
2078			/* Clear out the lower order bits for the next iteration */
2079			num_pages -= num << scale;
2080		}
2081
2082		cmd->tlbi.addr = iova;
2083		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
2084		iova += inv_range;
2085	}
2086	arm_smmu_cmdq_batch_submit(smmu, &cmds);
2087}
2088
2089static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
2090					  size_t granule, bool leaf,
2091					  struct arm_smmu_domain *smmu_domain)
2092{
2093	struct arm_smmu_cmdq_ent cmd = {
2094		.tlbi = {
2095			.leaf	= leaf,
2096		},
2097	};
2098
2099	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2100		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2101				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
2102		cmd.tlbi.asid	= smmu_domain->cd.asid;
2103	} else {
2104		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
2105		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
2106	}
2107	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2108
2109	/*
2110	 * Unfortunately, this can't be leaf-only since we may have
2111	 * zapped an entire table.
2112	 */
2113	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
2114}
2115
2116void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
2117				 size_t granule, bool leaf,
2118				 struct arm_smmu_domain *smmu_domain)
2119{
2120	struct arm_smmu_cmdq_ent cmd = {
2121		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
2122			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
2123		.tlbi = {
2124			.asid	= asid,
2125			.leaf	= leaf,
2126		},
2127	};
2128
2129	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
2130}
2131
2132static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2133					 unsigned long iova, size_t granule,
2134					 void *cookie)
2135{
2136	struct arm_smmu_domain *smmu_domain = cookie;
2137	struct iommu_domain *domain = &smmu_domain->domain;
2138
2139	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2140}
2141
2142static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2143				  size_t granule, void *cookie)
2144{
2145	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2146}
2147
2148static const struct iommu_flush_ops arm_smmu_flush_ops = {
2149	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2150	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2151	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2152};
2153
2154/* IOMMU API */
2155static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2156{
2157	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2158
2159	switch (cap) {
2160	case IOMMU_CAP_CACHE_COHERENCY:
2161		/* Assume that a coherent TCU implies coherent TBUs */
2162		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2163	case IOMMU_CAP_NOEXEC:
2164	case IOMMU_CAP_DEFERRED_FLUSH:
2165		return true;
2166	default:
2167		return false;
2168	}
2169}
2170
2171static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2172{
2173
2174	if (type == IOMMU_DOMAIN_SVA)
2175		return arm_smmu_sva_domain_alloc();
2176	return ERR_PTR(-EOPNOTSUPP);
2177}
2178
2179static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
2180{
2181	struct arm_smmu_domain *smmu_domain;
2182
2183	/*
2184	 * Allocate the domain and initialise some of its data structures.
2185	 * We can't really do anything meaningful until we've added a
2186	 * master.
2187	 */
2188	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2189	if (!smmu_domain)
2190		return ERR_PTR(-ENOMEM);
2191
2192	mutex_init(&smmu_domain->init_mutex);
2193	INIT_LIST_HEAD(&smmu_domain->devices);
2194	spin_lock_init(&smmu_domain->devices_lock);
2195	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2196
2197	if (dev) {
2198		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2199		int ret;
2200
2201		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
2202		if (ret) {
2203			kfree(smmu_domain);
2204			return ERR_PTR(ret);
2205		}
2206	}
2207	return &smmu_domain->domain;
2208}
2209
2210static void arm_smmu_domain_free(struct iommu_domain *domain)
2211{
2212	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2213	struct arm_smmu_device *smmu = smmu_domain->smmu;
2214
2215	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2216
2217	/* Free the ASID or VMID */
2218	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2219		/* Prevent SVA from touching the CD while we're freeing it */
2220		mutex_lock(&arm_smmu_asid_lock);
2221		arm_smmu_free_asid(&smmu_domain->cd);
2222		mutex_unlock(&arm_smmu_asid_lock);
2223	} else {
2224		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2225		if (cfg->vmid)
2226			ida_free(&smmu->vmid_map, cfg->vmid);
2227	}
2228
2229	kfree(smmu_domain);
2230}
2231
2232static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
2233				       struct arm_smmu_domain *smmu_domain,
2234				       struct io_pgtable_cfg *pgtbl_cfg)
2235{
2236	int ret;
2237	u32 asid;
2238	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2239	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2240
2241	refcount_set(&cd->refs, 1);
2242
2243	/* Prevent SVA from modifying the ASID until it is written to the CD */
2244	mutex_lock(&arm_smmu_asid_lock);
2245	ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
2246		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2247	if (ret)
2248		goto out_unlock;
2249
2250	cd->asid	= (u16)asid;
2251	cd->ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2252	cd->tcr		= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2253			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2254			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2255			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2256			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2257			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2258			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2259	cd->mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2260
2261	mutex_unlock(&arm_smmu_asid_lock);
2262	return 0;
2263
2264out_unlock:
2265	mutex_unlock(&arm_smmu_asid_lock);
2266	return ret;
2267}
2268
2269static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
2270				       struct arm_smmu_domain *smmu_domain,
2271				       struct io_pgtable_cfg *pgtbl_cfg)
2272{
2273	int vmid;
2274	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2275
2276	/* Reserve VMID 0 for stage-2 bypass STEs */
2277	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2278			       GFP_KERNEL);
2279	if (vmid < 0)
2280		return vmid;
2281
2282	cfg->vmid	= (u16)vmid;
2283	return 0;
2284}
2285
2286static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
2287				    struct arm_smmu_device *smmu)
2288{
2289	int ret;
2290	unsigned long ias, oas;
2291	enum io_pgtable_fmt fmt;
2292	struct io_pgtable_cfg pgtbl_cfg;
2293	struct io_pgtable_ops *pgtbl_ops;
2294	int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
2295				 struct arm_smmu_domain *smmu_domain,
2296				 struct io_pgtable_cfg *pgtbl_cfg);
2297
2298	/* Restrict the stage to what we can actually support */
2299	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2300		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2301	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2302		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2303
2304	switch (smmu_domain->stage) {
2305	case ARM_SMMU_DOMAIN_S1:
2306		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2307		ias = min_t(unsigned long, ias, VA_BITS);
2308		oas = smmu->ias;
2309		fmt = ARM_64_LPAE_S1;
2310		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2311		break;
2312	case ARM_SMMU_DOMAIN_S2:
2313		ias = smmu->ias;
2314		oas = smmu->oas;
2315		fmt = ARM_64_LPAE_S2;
2316		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2317		break;
2318	default:
2319		return -EINVAL;
2320	}
2321
2322	pgtbl_cfg = (struct io_pgtable_cfg) {
2323		.pgsize_bitmap	= smmu->pgsize_bitmap,
2324		.ias		= ias,
2325		.oas		= oas,
2326		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2327		.tlb		= &arm_smmu_flush_ops,
2328		.iommu_dev	= smmu->dev,
2329	};
2330
2331	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2332	if (!pgtbl_ops)
2333		return -ENOMEM;
2334
2335	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2336	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2337	smmu_domain->domain.geometry.force_aperture = true;
2338
2339	ret = finalise_stage_fn(smmu, smmu_domain, &pgtbl_cfg);
2340	if (ret < 0) {
2341		free_io_pgtable_ops(pgtbl_ops);
2342		return ret;
2343	}
2344
2345	smmu_domain->pgtbl_ops = pgtbl_ops;
2346	smmu_domain->smmu = smmu;
2347	return 0;
2348}
2349
2350static struct arm_smmu_ste *
2351arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2352{
2353	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2354
2355	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2356		unsigned int idx1, idx2;
2357
2358		/* Two-level walk */
2359		idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2360		idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
2361		return &cfg->l1_desc[idx1].l2ptr[idx2];
2362	} else {
2363		/* Simple linear lookup */
2364		return (struct arm_smmu_ste *)&cfg
2365			       ->strtab[sid * STRTAB_STE_DWORDS];
2366	}
2367}
2368
2369static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
2370					 const struct arm_smmu_ste *target)
2371{
2372	int i, j;
2373	struct arm_smmu_device *smmu = master->smmu;
2374
2375	for (i = 0; i < master->num_streams; ++i) {
2376		u32 sid = master->streams[i].id;
2377		struct arm_smmu_ste *step =
2378			arm_smmu_get_step_for_sid(smmu, sid);
2379
2380		/* Bridged PCI devices may end up with duplicated IDs */
2381		for (j = 0; j < i; j++)
2382			if (master->streams[j].id == sid)
2383				break;
2384		if (j < i)
2385			continue;
2386
2387		arm_smmu_write_ste(master, sid, step, target);
2388	}
2389}
2390
2391static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2392{
2393	struct device *dev = master->dev;
2394	struct arm_smmu_device *smmu = master->smmu;
2395	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2396
2397	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2398		return false;
2399
2400	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2401		return false;
2402
2403	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2404}
2405
2406static void arm_smmu_enable_ats(struct arm_smmu_master *master,
2407				struct arm_smmu_domain *smmu_domain)
2408{
2409	size_t stu;
2410	struct pci_dev *pdev;
2411	struct arm_smmu_device *smmu = master->smmu;
2412
2413	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2414	if (!master->ats_enabled)
2415		return;
2416
2417	/* Smallest Translation Unit: log2 of the smallest supported granule */
2418	stu = __ffs(smmu->pgsize_bitmap);
2419	pdev = to_pci_dev(master->dev);
2420
2421	atomic_inc(&smmu_domain->nr_ats_masters);
2422	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2423	if (pci_enable_ats(pdev, stu))
2424		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2425}
2426
2427static void arm_smmu_disable_ats(struct arm_smmu_master *master,
2428				 struct arm_smmu_domain *smmu_domain)
2429{
2430	if (!master->ats_enabled)
2431		return;
2432
2433	pci_disable_ats(to_pci_dev(master->dev));
2434	/*
2435	 * Ensure ATS is disabled at the endpoint before we issue the
2436	 * ATC invalidation via the SMMU.
2437	 */
2438	wmb();
2439	arm_smmu_atc_inv_master(master);
2440	atomic_dec(&smmu_domain->nr_ats_masters);
2441}
2442
2443static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2444{
2445	int ret;
2446	int features;
2447	int num_pasids;
2448	struct pci_dev *pdev;
2449
2450	if (!dev_is_pci(master->dev))
2451		return -ENODEV;
2452
2453	pdev = to_pci_dev(master->dev);
2454
2455	features = pci_pasid_features(pdev);
2456	if (features < 0)
2457		return features;
2458
2459	num_pasids = pci_max_pasids(pdev);
2460	if (num_pasids <= 0)
2461		return num_pasids;
2462
2463	ret = pci_enable_pasid(pdev, features);
2464	if (ret) {
2465		dev_err(&pdev->dev, "Failed to enable PASID\n");
2466		return ret;
2467	}
2468
2469	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2470				  master->smmu->ssid_bits);
2471	return 0;
2472}
2473
2474static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2475{
2476	struct pci_dev *pdev;
2477
2478	if (!dev_is_pci(master->dev))
2479		return;
2480
2481	pdev = to_pci_dev(master->dev);
2482
2483	if (!pdev->pasid_enabled)
2484		return;
2485
2486	master->ssid_bits = 0;
2487	pci_disable_pasid(pdev);
2488}
2489
2490static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2491{
2492	struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev);
2493	struct arm_smmu_domain *smmu_domain;
2494	unsigned long flags;
2495
2496	if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING))
2497		return;
2498
2499	smmu_domain = to_smmu_domain(domain);
2500	arm_smmu_disable_ats(master, smmu_domain);
2501
2502	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2503	list_del_init(&master->domain_head);
2504	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2505
2506	master->ats_enabled = false;
2507}
2508
2509static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2510{
2511	int ret = 0;
2512	unsigned long flags;
2513	struct arm_smmu_ste target;
2514	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2515	struct arm_smmu_device *smmu;
2516	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2517	struct arm_smmu_master *master;
2518
2519	if (!fwspec)
2520		return -ENOENT;
2521
2522	master = dev_iommu_priv_get(dev);
2523	smmu = master->smmu;
2524
2525	/*
2526	 * Checking that SVA is disabled ensures that this device isn't bound to
2527	 * any mm, and can be safely detached from its old domain. Bonds cannot
2528	 * be removed concurrently since we're holding the group mutex.
2529	 */
2530	if (arm_smmu_master_sva_enabled(master)) {
2531		dev_err(dev, "cannot attach - SVA enabled\n");
2532		return -EBUSY;
2533	}
2534
2535	mutex_lock(&smmu_domain->init_mutex);
2536
2537	if (!smmu_domain->smmu) {
2538		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
2539	} else if (smmu_domain->smmu != smmu)
2540		ret = -EINVAL;
2541
2542	mutex_unlock(&smmu_domain->init_mutex);
2543	if (ret)
2544		return ret;
2545
2546	/*
2547	 * Prevent arm_smmu_share_asid() from trying to change the ASID
2548	 * of either the old or new domain while we are working on it.
2549	 * This allows the STE and the smmu_domain->devices list to
2550	 * be inconsistent during this routine.
2551	 */
2552	mutex_lock(&arm_smmu_asid_lock);
2553
2554	arm_smmu_detach_dev(master);
2555
2556	master->ats_enabled = arm_smmu_ats_supported(master);
2557
2558	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2559	list_add(&master->domain_head, &smmu_domain->devices);
2560	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2561
2562	switch (smmu_domain->stage) {
2563	case ARM_SMMU_DOMAIN_S1:
2564		if (!master->cd_table.cdtab) {
2565			ret = arm_smmu_alloc_cd_tables(master);
2566			if (ret)
2567				goto out_list_del;
2568		} else {
2569			/*
2570			 * arm_smmu_write_ctx_desc() relies on the entry being
2571			 * invalid to work, clear any existing entry.
2572			 */
2573			ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
2574						      NULL);
2575			if (ret)
2576				goto out_list_del;
2577		}
2578
2579		ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
2580		if (ret)
2581			goto out_list_del;
2582
2583		arm_smmu_make_cdtable_ste(&target, master);
2584		arm_smmu_install_ste_for_dev(master, &target);
2585		break;
2586	case ARM_SMMU_DOMAIN_S2:
2587		arm_smmu_make_s2_domain_ste(&target, master, smmu_domain);
2588		arm_smmu_install_ste_for_dev(master, &target);
2589		if (master->cd_table.cdtab)
2590			arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID,
2591						      NULL);
2592		break;
2593	}
2594
2595	arm_smmu_enable_ats(master, smmu_domain);
2596	goto out_unlock;
2597
2598out_list_del:
2599	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2600	list_del_init(&master->domain_head);
2601	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2602
2603out_unlock:
2604	mutex_unlock(&arm_smmu_asid_lock);
2605	return ret;
2606}
2607
2608static int arm_smmu_attach_dev_ste(struct device *dev,
2609				   struct arm_smmu_ste *ste)
2610{
2611	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2612
2613	if (arm_smmu_master_sva_enabled(master))
2614		return -EBUSY;
2615
2616	/*
2617	 * Do not allow any ASID to be changed while are working on the STE,
2618	 * otherwise we could miss invalidations.
2619	 */
2620	mutex_lock(&arm_smmu_asid_lock);
2621
2622	/*
2623	 * The SMMU does not support enabling ATS with bypass/abort. When the
2624	 * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests
2625	 * and Translated transactions are denied as though ATS is disabled for
2626	 * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2627	 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2628	 */
2629	arm_smmu_detach_dev(master);
2630
2631	arm_smmu_install_ste_for_dev(master, ste);
2632	mutex_unlock(&arm_smmu_asid_lock);
2633
2634	/*
2635	 * This has to be done after removing the master from the
2636	 * arm_smmu_domain->devices to avoid races updating the same context
2637	 * descriptor from arm_smmu_share_asid().
2638	 */
2639	if (master->cd_table.cdtab)
2640		arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
2641	return 0;
2642}
2643
2644static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
2645					struct device *dev)
2646{
2647	struct arm_smmu_ste ste;
2648	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2649
2650	arm_smmu_make_bypass_ste(master->smmu, &ste);
2651	return arm_smmu_attach_dev_ste(dev, &ste);
2652}
2653
2654static const struct iommu_domain_ops arm_smmu_identity_ops = {
2655	.attach_dev = arm_smmu_attach_dev_identity,
2656};
2657
2658static struct iommu_domain arm_smmu_identity_domain = {
2659	.type = IOMMU_DOMAIN_IDENTITY,
2660	.ops = &arm_smmu_identity_ops,
2661};
2662
2663static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
2664					struct device *dev)
2665{
2666	struct arm_smmu_ste ste;
2667
2668	arm_smmu_make_abort_ste(&ste);
2669	return arm_smmu_attach_dev_ste(dev, &ste);
2670}
2671
2672static const struct iommu_domain_ops arm_smmu_blocked_ops = {
2673	.attach_dev = arm_smmu_attach_dev_blocked,
2674};
2675
2676static struct iommu_domain arm_smmu_blocked_domain = {
2677	.type = IOMMU_DOMAIN_BLOCKED,
2678	.ops = &arm_smmu_blocked_ops,
2679};
2680
2681static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2682			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2683			      int prot, gfp_t gfp, size_t *mapped)
2684{
2685	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2686
2687	if (!ops)
2688		return -ENODEV;
2689
2690	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2691}
2692
2693static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2694				   size_t pgsize, size_t pgcount,
2695				   struct iommu_iotlb_gather *gather)
2696{
2697	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2698	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2699
2700	if (!ops)
2701		return 0;
2702
2703	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2704}
2705
2706static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2707{
2708	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2709
2710	if (smmu_domain->smmu)
2711		arm_smmu_tlb_inv_context(smmu_domain);
2712}
2713
2714static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2715				struct iommu_iotlb_gather *gather)
2716{
2717	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2718
2719	if (!gather->pgsize)
2720		return;
2721
2722	arm_smmu_tlb_inv_range_domain(gather->start,
2723				      gather->end - gather->start + 1,
2724				      gather->pgsize, true, smmu_domain);
2725}
2726
2727static phys_addr_t
2728arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2729{
2730	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2731
2732	if (!ops)
2733		return 0;
2734
2735	return ops->iova_to_phys(ops, iova);
2736}
2737
2738static struct platform_driver arm_smmu_driver;
2739
2740static
2741struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2742{
2743	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2744							  fwnode);
2745	put_device(dev);
2746	return dev ? dev_get_drvdata(dev) : NULL;
2747}
2748
2749static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2750{
2751	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2752
2753	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2754		limit *= 1UL << STRTAB_SPLIT;
2755
2756	return sid < limit;
2757}
2758
2759static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2760{
2761	/* Check the SIDs are in range of the SMMU and our stream table */
2762	if (!arm_smmu_sid_in_range(smmu, sid))
2763		return -ERANGE;
2764
2765	/* Ensure l2 strtab is initialised */
2766	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2767		return arm_smmu_init_l2_strtab(smmu, sid);
2768
2769	return 0;
2770}
2771
2772static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2773				  struct arm_smmu_master *master)
2774{
2775	int i;
2776	int ret = 0;
2777	struct arm_smmu_stream *new_stream, *cur_stream;
2778	struct rb_node **new_node, *parent_node = NULL;
2779	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2780
2781	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2782				  GFP_KERNEL);
2783	if (!master->streams)
2784		return -ENOMEM;
2785	master->num_streams = fwspec->num_ids;
2786
2787	mutex_lock(&smmu->streams_mutex);
2788	for (i = 0; i < fwspec->num_ids; i++) {
2789		u32 sid = fwspec->ids[i];
2790
2791		new_stream = &master->streams[i];
2792		new_stream->id = sid;
2793		new_stream->master = master;
2794
2795		ret = arm_smmu_init_sid_strtab(smmu, sid);
2796		if (ret)
2797			break;
2798
2799		/* Insert into SID tree */
2800		new_node = &(smmu->streams.rb_node);
2801		while (*new_node) {
2802			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2803					      node);
2804			parent_node = *new_node;
2805			if (cur_stream->id > new_stream->id) {
2806				new_node = &((*new_node)->rb_left);
2807			} else if (cur_stream->id < new_stream->id) {
2808				new_node = &((*new_node)->rb_right);
2809			} else {
2810				dev_warn(master->dev,
2811					 "stream %u already in tree\n",
2812					 cur_stream->id);
2813				ret = -EINVAL;
2814				break;
2815			}
2816		}
2817		if (ret)
2818			break;
2819
2820		rb_link_node(&new_stream->node, parent_node, new_node);
2821		rb_insert_color(&new_stream->node, &smmu->streams);
2822	}
2823
2824	if (ret) {
2825		for (i--; i >= 0; i--)
2826			rb_erase(&master->streams[i].node, &smmu->streams);
2827		kfree(master->streams);
2828	}
2829	mutex_unlock(&smmu->streams_mutex);
2830
2831	return ret;
2832}
2833
2834static void arm_smmu_remove_master(struct arm_smmu_master *master)
2835{
2836	int i;
2837	struct arm_smmu_device *smmu = master->smmu;
2838	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2839
2840	if (!smmu || !master->streams)
2841		return;
2842
2843	mutex_lock(&smmu->streams_mutex);
2844	for (i = 0; i < fwspec->num_ids; i++)
2845		rb_erase(&master->streams[i].node, &smmu->streams);
2846	mutex_unlock(&smmu->streams_mutex);
2847
2848	kfree(master->streams);
2849}
2850
2851static struct iommu_ops arm_smmu_ops;
2852
2853static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2854{
2855	int ret;
2856	struct arm_smmu_device *smmu;
2857	struct arm_smmu_master *master;
2858	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2859
2860	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2861		return ERR_PTR(-EBUSY);
2862
2863	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2864	if (!smmu)
2865		return ERR_PTR(-ENODEV);
2866
2867	master = kzalloc(sizeof(*master), GFP_KERNEL);
2868	if (!master)
2869		return ERR_PTR(-ENOMEM);
2870
2871	master->dev = dev;
2872	master->smmu = smmu;
2873	INIT_LIST_HEAD(&master->bonds);
2874	INIT_LIST_HEAD(&master->domain_head);
2875	dev_iommu_priv_set(dev, master);
2876
2877	ret = arm_smmu_insert_master(smmu, master);
2878	if (ret)
2879		goto err_free_master;
2880
2881	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2882	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2883
2884	/*
2885	 * Note that PASID must be enabled before, and disabled after ATS:
2886	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2887	 *
2888	 *   Behavior is undefined if this bit is Set and the value of the PASID
2889	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2890	 *   are changed.
2891	 */
2892	arm_smmu_enable_pasid(master);
2893
2894	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2895		master->ssid_bits = min_t(u8, master->ssid_bits,
2896					  CTXDESC_LINEAR_CDMAX);
2897
2898	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2899	     device_property_read_bool(dev, "dma-can-stall")) ||
2900	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2901		master->stall_enabled = true;
2902
2903	return &smmu->iommu;
2904
2905err_free_master:
2906	kfree(master);
2907	return ERR_PTR(ret);
2908}
2909
2910static void arm_smmu_release_device(struct device *dev)
2911{
2912	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2913
2914	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2915		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2916
2917	/* Put the STE back to what arm_smmu_init_strtab() sets */
2918	if (disable_bypass && !dev->iommu->require_direct)
2919		arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
2920	else
2921		arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
2922
2923	arm_smmu_disable_pasid(master);
2924	arm_smmu_remove_master(master);
2925	if (master->cd_table.cdtab)
2926		arm_smmu_free_cd_tables(master);
2927	kfree(master);
2928}
2929
2930static struct iommu_group *arm_smmu_device_group(struct device *dev)
2931{
2932	struct iommu_group *group;
2933
2934	/*
2935	 * We don't support devices sharing stream IDs other than PCI RID
2936	 * aliases, since the necessary ID-to-device lookup becomes rather
2937	 * impractical given a potential sparse 32-bit stream ID space.
2938	 */
2939	if (dev_is_pci(dev))
2940		group = pci_device_group(dev);
2941	else
2942		group = generic_device_group(dev);
2943
2944	return group;
2945}
2946
2947static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2948{
2949	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2950	int ret = 0;
2951
2952	mutex_lock(&smmu_domain->init_mutex);
2953	if (smmu_domain->smmu)
2954		ret = -EPERM;
2955	else
2956		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2957	mutex_unlock(&smmu_domain->init_mutex);
2958
2959	return ret;
2960}
2961
2962static int arm_smmu_of_xlate(struct device *dev,
2963			     const struct of_phandle_args *args)
2964{
2965	return iommu_fwspec_add_ids(dev, args->args, 1);
2966}
2967
2968static void arm_smmu_get_resv_regions(struct device *dev,
2969				      struct list_head *head)
2970{
2971	struct iommu_resv_region *region;
2972	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2973
2974	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2975					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2976	if (!region)
2977		return;
2978
2979	list_add_tail(&region->list, head);
2980
2981	iommu_dma_get_resv_regions(dev, head);
2982}
2983
2984static int arm_smmu_dev_enable_feature(struct device *dev,
2985				       enum iommu_dev_features feat)
2986{
2987	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2988
2989	if (!master)
2990		return -ENODEV;
2991
2992	switch (feat) {
2993	case IOMMU_DEV_FEAT_IOPF:
2994		if (!arm_smmu_master_iopf_supported(master))
2995			return -EINVAL;
2996		if (master->iopf_enabled)
2997			return -EBUSY;
2998		master->iopf_enabled = true;
2999		return 0;
3000	case IOMMU_DEV_FEAT_SVA:
3001		if (!arm_smmu_master_sva_supported(master))
3002			return -EINVAL;
3003		if (arm_smmu_master_sva_enabled(master))
3004			return -EBUSY;
3005		return arm_smmu_master_enable_sva(master);
3006	default:
3007		return -EINVAL;
3008	}
3009}
3010
3011static int arm_smmu_dev_disable_feature(struct device *dev,
3012					enum iommu_dev_features feat)
3013{
3014	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
3015
3016	if (!master)
3017		return -EINVAL;
3018
3019	switch (feat) {
3020	case IOMMU_DEV_FEAT_IOPF:
3021		if (!master->iopf_enabled)
3022			return -EINVAL;
3023		if (master->sva_enabled)
3024			return -EBUSY;
3025		master->iopf_enabled = false;
3026		return 0;
3027	case IOMMU_DEV_FEAT_SVA:
3028		if (!arm_smmu_master_sva_enabled(master))
3029			return -EINVAL;
3030		return arm_smmu_master_disable_sva(master);
3031	default:
3032		return -EINVAL;
3033	}
3034}
3035
3036/*
3037 * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
3038 * PCIe link and save the data to memory by DMA. The hardware is restricted to
3039 * use identity mapping only.
3040 */
3041#define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
3042					 (pdev)->device == 0xa12e)
3043
3044static int arm_smmu_def_domain_type(struct device *dev)
3045{
3046	if (dev_is_pci(dev)) {
3047		struct pci_dev *pdev = to_pci_dev(dev);
3048
3049		if (IS_HISI_PTT_DEVICE(pdev))
3050			return IOMMU_DOMAIN_IDENTITY;
3051	}
3052
3053	return 0;
3054}
3055
3056static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
3057{
3058	struct iommu_domain *domain;
3059
3060	domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
3061	if (WARN_ON(IS_ERR(domain)) || !domain)
3062		return;
3063
3064	arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
3065}
3066
3067static struct iommu_ops arm_smmu_ops = {
3068	.identity_domain	= &arm_smmu_identity_domain,
3069	.blocked_domain		= &arm_smmu_blocked_domain,
3070	.capable		= arm_smmu_capable,
3071	.domain_alloc		= arm_smmu_domain_alloc,
3072	.domain_alloc_paging    = arm_smmu_domain_alloc_paging,
3073	.probe_device		= arm_smmu_probe_device,
3074	.release_device		= arm_smmu_release_device,
3075	.device_group		= arm_smmu_device_group,
3076	.of_xlate		= arm_smmu_of_xlate,
3077	.get_resv_regions	= arm_smmu_get_resv_regions,
3078	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
3079	.dev_enable_feat	= arm_smmu_dev_enable_feature,
3080	.dev_disable_feat	= arm_smmu_dev_disable_feature,
3081	.page_response		= arm_smmu_page_response,
3082	.def_domain_type	= arm_smmu_def_domain_type,
3083	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
3084	.owner			= THIS_MODULE,
3085	.default_domain_ops = &(const struct iommu_domain_ops) {
3086		.attach_dev		= arm_smmu_attach_dev,
3087		.map_pages		= arm_smmu_map_pages,
3088		.unmap_pages		= arm_smmu_unmap_pages,
3089		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
3090		.iotlb_sync		= arm_smmu_iotlb_sync,
3091		.iova_to_phys		= arm_smmu_iova_to_phys,
3092		.enable_nesting		= arm_smmu_enable_nesting,
3093		.free			= arm_smmu_domain_free,
3094	}
3095};
3096
3097/* Probing and initialisation functions */
3098static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3099				   struct arm_smmu_queue *q,
3100				   void __iomem *page,
3101				   unsigned long prod_off,
3102				   unsigned long cons_off,
3103				   size_t dwords, const char *name)
3104{
3105	size_t qsz;
3106
3107	do {
3108		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3109		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3110					      GFP_KERNEL);
3111		if (q->base || qsz < PAGE_SIZE)
3112			break;
3113
3114		q->llq.max_n_shift--;
3115	} while (1);
3116
3117	if (!q->base) {
3118		dev_err(smmu->dev,
3119			"failed to allocate queue (0x%zx bytes) for %s\n",
3120			qsz, name);
3121		return -ENOMEM;
3122	}
3123
3124	if (!WARN_ON(q->base_dma & (qsz - 1))) {
3125		dev_info(smmu->dev, "allocated %u entries for %s\n",
3126			 1 << q->llq.max_n_shift, name);
3127	}
3128
3129	q->prod_reg	= page + prod_off;
3130	q->cons_reg	= page + cons_off;
3131	q->ent_dwords	= dwords;
3132
3133	q->q_base  = Q_BASE_RWA;
3134	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3135	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3136
3137	q->llq.prod = q->llq.cons = 0;
3138	return 0;
3139}
3140
3141static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3142{
3143	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3144	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3145
3146	atomic_set(&cmdq->owner_prod, 0);
3147	atomic_set(&cmdq->lock, 0);
3148
3149	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
3150							      GFP_KERNEL);
3151	if (!cmdq->valid_map)
3152		return -ENOMEM;
3153
3154	return 0;
3155}
3156
3157static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3158{
3159	int ret;
3160
3161	/* cmdq */
3162	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
3163				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
3164				      CMDQ_ENT_DWORDS, "cmdq");
3165	if (ret)
3166		return ret;
3167
3168	ret = arm_smmu_cmdq_init(smmu);
3169	if (ret)
3170		return ret;
3171
3172	/* evtq */
3173	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
3174				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
3175				      EVTQ_ENT_DWORDS, "evtq");
3176	if (ret)
3177		return ret;
3178
3179	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
3180	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
3181		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
3182		if (!smmu->evtq.iopf)
3183			return -ENOMEM;
3184	}
3185
3186	/* priq */
3187	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3188		return 0;
3189
3190	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
3191				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
3192				       PRIQ_ENT_DWORDS, "priq");
3193}
3194
3195static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3196{
3197	unsigned int i;
3198	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3199	void *strtab = smmu->strtab_cfg.strtab;
3200
3201	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
3202				    sizeof(*cfg->l1_desc), GFP_KERNEL);
3203	if (!cfg->l1_desc)
3204		return -ENOMEM;
3205
3206	for (i = 0; i < cfg->num_l1_ents; ++i) {
3207		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3208		strtab += STRTAB_L1_DESC_DWORDS << 3;
3209	}
3210
3211	return 0;
3212}
3213
3214static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3215{
3216	void *strtab;
3217	u64 reg;
3218	u32 size, l1size;
3219	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3220
3221	/* Calculate the L1 size, capped to the SIDSIZE. */
3222	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3223	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3224	cfg->num_l1_ents = 1 << size;
3225
3226	size += STRTAB_SPLIT;
3227	if (size < smmu->sid_bits)
3228		dev_warn(smmu->dev,
3229			 "2-level strtab only covers %u/%u bits of SID\n",
3230			 size, smmu->sid_bits);
3231
3232	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3233	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3234				     GFP_KERNEL);
3235	if (!strtab) {
3236		dev_err(smmu->dev,
3237			"failed to allocate l1 stream table (%u bytes)\n",
3238			l1size);
3239		return -ENOMEM;
3240	}
3241	cfg->strtab = strtab;
3242
3243	/* Configure strtab_base_cfg for 2 levels */
3244	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3245	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3246	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3247	cfg->strtab_base_cfg = reg;
3248
3249	return arm_smmu_init_l1_strtab(smmu);
3250}
3251
3252static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3253{
3254	void *strtab;
3255	u64 reg;
3256	u32 size;
3257	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3258
3259	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3260	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3261				     GFP_KERNEL);
3262	if (!strtab) {
3263		dev_err(smmu->dev,
3264			"failed to allocate linear stream table (%u bytes)\n",
3265			size);
3266		return -ENOMEM;
3267	}
3268	cfg->strtab = strtab;
3269	cfg->num_l1_ents = 1 << smmu->sid_bits;
3270
3271	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3272	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3273	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3274	cfg->strtab_base_cfg = reg;
3275
3276	arm_smmu_init_initial_stes(smmu, strtab, cfg->num_l1_ents);
3277	return 0;
3278}
3279
3280static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3281{
3282	u64 reg;
3283	int ret;
3284
3285	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3286		ret = arm_smmu_init_strtab_2lvl(smmu);
3287	else
3288		ret = arm_smmu_init_strtab_linear(smmu);
3289
3290	if (ret)
3291		return ret;
3292
3293	/* Set the strtab base address */
3294	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3295	reg |= STRTAB_BASE_RA;
3296	smmu->strtab_cfg.strtab_base = reg;
3297
3298	ida_init(&smmu->vmid_map);
3299
3300	return 0;
3301}
3302
3303static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3304{
3305	int ret;
3306
3307	mutex_init(&smmu->streams_mutex);
3308	smmu->streams = RB_ROOT;
3309
3310	ret = arm_smmu_init_queues(smmu);
3311	if (ret)
3312		return ret;
3313
3314	return arm_smmu_init_strtab(smmu);
3315}
3316
3317static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3318				   unsigned int reg_off, unsigned int ack_off)
3319{
3320	u32 reg;
3321
3322	writel_relaxed(val, smmu->base + reg_off);
3323	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3324					  1, ARM_SMMU_POLL_TIMEOUT_US);
3325}
3326
3327/* GBPA is "special" */
3328static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3329{
3330	int ret;
3331	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3332
3333	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3334					 1, ARM_SMMU_POLL_TIMEOUT_US);
3335	if (ret)
3336		return ret;
3337
3338	reg &= ~clr;
3339	reg |= set;
3340	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3341	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3342					 1, ARM_SMMU_POLL_TIMEOUT_US);
3343
3344	if (ret)
3345		dev_err(smmu->dev, "GBPA not responding to update\n");
3346	return ret;
3347}
3348
3349static void arm_smmu_free_msis(void *data)
3350{
3351	struct device *dev = data;
3352
3353	platform_device_msi_free_irqs_all(dev);
3354}
3355
3356static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3357{
3358	phys_addr_t doorbell;
3359	struct device *dev = msi_desc_to_dev(desc);
3360	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3361	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3362
3363	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3364	doorbell &= MSI_CFG0_ADDR_MASK;
3365
3366	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3367	writel_relaxed(msg->data, smmu->base + cfg[1]);
3368	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3369}
3370
3371static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3372{
3373	int ret, nvec = ARM_SMMU_MAX_MSIS;
3374	struct device *dev = smmu->dev;
3375
3376	/* Clear the MSI address regs */
3377	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3378	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3379
3380	if (smmu->features & ARM_SMMU_FEAT_PRI)
3381		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3382	else
3383		nvec--;
3384
3385	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3386		return;
3387
3388	if (!dev->msi.domain) {
3389		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3390		return;
3391	}
3392
3393	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3394	ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3395	if (ret) {
3396		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3397		return;
3398	}
3399
3400	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3401	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3402	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3403
3404	/* Add callback to free MSIs on teardown */
3405	devm_add_action(dev, arm_smmu_free_msis, dev);
3406}
3407
3408static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3409{
3410	int irq, ret;
3411
3412	arm_smmu_setup_msis(smmu);
3413
3414	/* Request interrupt lines */
3415	irq = smmu->evtq.q.irq;
3416	if (irq) {
3417		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3418						arm_smmu_evtq_thread,
3419						IRQF_ONESHOT,
3420						"arm-smmu-v3-evtq", smmu);
3421		if (ret < 0)
3422			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3423	} else {
3424		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3425	}
3426
3427	irq = smmu->gerr_irq;
3428	if (irq) {
3429		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3430				       0, "arm-smmu-v3-gerror", smmu);
3431		if (ret < 0)
3432			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3433	} else {
3434		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3435	}
3436
3437	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3438		irq = smmu->priq.q.irq;
3439		if (irq) {
3440			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3441							arm_smmu_priq_thread,
3442							IRQF_ONESHOT,
3443							"arm-smmu-v3-priq",
3444							smmu);
3445			if (ret < 0)
3446				dev_warn(smmu->dev,
3447					 "failed to enable priq irq\n");
3448		} else {
3449			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3450		}
3451	}
3452}
3453
3454static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3455{
3456	int ret, irq;
3457	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3458
3459	/* Disable IRQs first */
3460	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3461				      ARM_SMMU_IRQ_CTRLACK);
3462	if (ret) {
3463		dev_err(smmu->dev, "failed to disable irqs\n");
3464		return ret;
3465	}
3466
3467	irq = smmu->combined_irq;
3468	if (irq) {
3469		/*
3470		 * Cavium ThunderX2 implementation doesn't support unique irq
3471		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3472		 */
3473		ret = devm_request_threaded_irq(smmu->dev, irq,
3474					arm_smmu_combined_irq_handler,
3475					arm_smmu_combined_irq_thread,
3476					IRQF_ONESHOT,
3477					"arm-smmu-v3-combined-irq", smmu);
3478		if (ret < 0)
3479			dev_warn(smmu->dev, "failed to enable combined irq\n");
3480	} else
3481		arm_smmu_setup_unique_irqs(smmu);
3482
3483	if (smmu->features & ARM_SMMU_FEAT_PRI)
3484		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3485
3486	/* Enable interrupt generation on the SMMU */
3487	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3488				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3489	if (ret)
3490		dev_warn(smmu->dev, "failed to enable irqs\n");
3491
3492	return 0;
3493}
3494
3495static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3496{
3497	int ret;
3498
3499	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3500	if (ret)
3501		dev_err(smmu->dev, "failed to clear cr0\n");
3502
3503	return ret;
3504}
3505
3506static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3507{
3508	int ret;
3509	u32 reg, enables;
3510	struct arm_smmu_cmdq_ent cmd;
3511
3512	/* Clear CR0 and sync (disables SMMU and queue processing) */
3513	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3514	if (reg & CR0_SMMUEN) {
3515		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3516		WARN_ON(is_kdump_kernel() && !disable_bypass);
3517		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3518	}
3519
3520	ret = arm_smmu_device_disable(smmu);
3521	if (ret)
3522		return ret;
3523
3524	/* CR1 (table and queue memory attributes) */
3525	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3526	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3527	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3528	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3529	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3530	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3531	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3532
3533	/* CR2 (random crap) */
3534	reg = CR2_PTM | CR2_RECINVSID;
3535
3536	if (smmu->features & ARM_SMMU_FEAT_E2H)
3537		reg |= CR2_E2H;
3538
3539	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3540
3541	/* Stream table */
3542	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3543		       smmu->base + ARM_SMMU_STRTAB_BASE);
3544	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3545		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3546
3547	/* Command queue */
3548	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3549	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3550	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3551
3552	enables = CR0_CMDQEN;
3553	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3554				      ARM_SMMU_CR0ACK);
3555	if (ret) {
3556		dev_err(smmu->dev, "failed to enable command queue\n");
3557		return ret;
3558	}
3559
3560	/* Invalidate any cached configuration */
3561	cmd.opcode = CMDQ_OP_CFGI_ALL;
3562	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3563
3564	/* Invalidate any stale TLB entries */
3565	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3566		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3567		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3568	}
3569
3570	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3571	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3572
3573	/* Event queue */
3574	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3575	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3576	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3577
3578	enables |= CR0_EVTQEN;
3579	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3580				      ARM_SMMU_CR0ACK);
3581	if (ret) {
3582		dev_err(smmu->dev, "failed to enable event queue\n");
3583		return ret;
3584	}
3585
3586	/* PRI queue */
3587	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3588		writeq_relaxed(smmu->priq.q.q_base,
3589			       smmu->base + ARM_SMMU_PRIQ_BASE);
3590		writel_relaxed(smmu->priq.q.llq.prod,
3591			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3592		writel_relaxed(smmu->priq.q.llq.cons,
3593			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3594
3595		enables |= CR0_PRIQEN;
3596		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3597					      ARM_SMMU_CR0ACK);
3598		if (ret) {
3599			dev_err(smmu->dev, "failed to enable PRI queue\n");
3600			return ret;
3601		}
3602	}
3603
3604	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3605		enables |= CR0_ATSCHK;
3606		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3607					      ARM_SMMU_CR0ACK);
3608		if (ret) {
3609			dev_err(smmu->dev, "failed to enable ATS check\n");
3610			return ret;
3611		}
3612	}
3613
3614	ret = arm_smmu_setup_irqs(smmu);
3615	if (ret) {
3616		dev_err(smmu->dev, "failed to setup irqs\n");
3617		return ret;
3618	}
3619
3620	if (is_kdump_kernel())
3621		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3622
3623	/* Enable the SMMU interface, or ensure bypass */
3624	if (!bypass || disable_bypass) {
3625		enables |= CR0_SMMUEN;
3626	} else {
3627		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3628		if (ret)
3629			return ret;
3630	}
3631	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3632				      ARM_SMMU_CR0ACK);
3633	if (ret) {
3634		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3635		return ret;
3636	}
3637
3638	return 0;
3639}
3640
3641#define IIDR_IMPLEMENTER_ARM		0x43b
3642#define IIDR_PRODUCTID_ARM_MMU_600	0x483
3643#define IIDR_PRODUCTID_ARM_MMU_700	0x487
3644
3645static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3646{
3647	u32 reg;
3648	unsigned int implementer, productid, variant, revision;
3649
3650	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3651	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3652	productid = FIELD_GET(IIDR_PRODUCTID, reg);
3653	variant = FIELD_GET(IIDR_VARIANT, reg);
3654	revision = FIELD_GET(IIDR_REVISION, reg);
3655
3656	switch (implementer) {
3657	case IIDR_IMPLEMENTER_ARM:
3658		switch (productid) {
3659		case IIDR_PRODUCTID_ARM_MMU_600:
3660			/* Arm erratum 1076982 */
3661			if (variant == 0 && revision <= 2)
3662				smmu->features &= ~ARM_SMMU_FEAT_SEV;
3663			/* Arm erratum 1209401 */
3664			if (variant < 2)
3665				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3666			break;
3667		case IIDR_PRODUCTID_ARM_MMU_700:
3668			/* Arm erratum 2812531 */
3669			smmu->features &= ~ARM_SMMU_FEAT_BTM;
3670			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3671			/* Arm errata 2268618, 2812531 */
3672			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3673			break;
3674		}
3675		break;
3676	}
3677}
3678
3679static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3680{
3681	u32 reg;
3682	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3683
3684	/* IDR0 */
3685	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3686
3687	/* 2-level structures */
3688	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3689		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3690
3691	if (reg & IDR0_CD2L)
3692		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3693
3694	/*
3695	 * Translation table endianness.
3696	 * We currently require the same endianness as the CPU, but this
3697	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3698	 */
3699	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3700	case IDR0_TTENDIAN_MIXED:
3701		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3702		break;
3703#ifdef __BIG_ENDIAN
3704	case IDR0_TTENDIAN_BE:
3705		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3706		break;
3707#else
3708	case IDR0_TTENDIAN_LE:
3709		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3710		break;
3711#endif
3712	default:
3713		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3714		return -ENXIO;
3715	}
3716
3717	/* Boolean feature flags */
3718	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3719		smmu->features |= ARM_SMMU_FEAT_PRI;
3720
3721	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3722		smmu->features |= ARM_SMMU_FEAT_ATS;
3723
3724	if (reg & IDR0_SEV)
3725		smmu->features |= ARM_SMMU_FEAT_SEV;
3726
3727	if (reg & IDR0_MSI) {
3728		smmu->features |= ARM_SMMU_FEAT_MSI;
3729		if (coherent && !disable_msipolling)
3730			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3731	}
3732
3733	if (reg & IDR0_HYP) {
3734		smmu->features |= ARM_SMMU_FEAT_HYP;
3735		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3736			smmu->features |= ARM_SMMU_FEAT_E2H;
3737	}
3738
3739	/*
3740	 * The coherency feature as set by FW is used in preference to the ID
3741	 * register, but warn on mismatch.
3742	 */
3743	if (!!(reg & IDR0_COHACC) != coherent)
3744		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3745			 coherent ? "true" : "false");
3746
3747	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3748	case IDR0_STALL_MODEL_FORCE:
3749		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3750		fallthrough;
3751	case IDR0_STALL_MODEL_STALL:
3752		smmu->features |= ARM_SMMU_FEAT_STALLS;
3753	}
3754
3755	if (reg & IDR0_S1P)
3756		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3757
3758	if (reg & IDR0_S2P)
3759		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3760
3761	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3762		dev_err(smmu->dev, "no translation support!\n");
3763		return -ENXIO;
3764	}
3765
3766	/* We only support the AArch64 table format at present */
3767	switch (FIELD_GET(IDR0_TTF, reg)) {
3768	case IDR0_TTF_AARCH32_64:
3769		smmu->ias = 40;
3770		fallthrough;
3771	case IDR0_TTF_AARCH64:
3772		break;
3773	default:
3774		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3775		return -ENXIO;
3776	}
3777
3778	/* ASID/VMID sizes */
3779	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3780	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3781
3782	/* IDR1 */
3783	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3784	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3785		dev_err(smmu->dev, "embedded implementation not supported\n");
3786		return -ENXIO;
3787	}
3788
3789	if (reg & IDR1_ATTR_TYPES_OVR)
3790		smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
3791
3792	/* Queue sizes, capped to ensure natural alignment */
3793	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3794					     FIELD_GET(IDR1_CMDQS, reg));
3795	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3796		/*
3797		 * We don't support splitting up batches, so one batch of
3798		 * commands plus an extra sync needs to fit inside the command
3799		 * queue. There's also no way we can handle the weird alignment
3800		 * restrictions on the base pointer for a unit-length queue.
3801		 */
3802		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3803			CMDQ_BATCH_ENTRIES);
3804		return -ENXIO;
3805	}
3806
3807	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3808					     FIELD_GET(IDR1_EVTQS, reg));
3809	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3810					     FIELD_GET(IDR1_PRIQS, reg));
3811
3812	/* SID/SSID sizes */
3813	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3814	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3815	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3816
3817	/*
3818	 * If the SMMU supports fewer bits than would fill a single L2 stream
3819	 * table, use a linear table instead.
3820	 */
3821	if (smmu->sid_bits <= STRTAB_SPLIT)
3822		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3823
3824	/* IDR3 */
3825	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3826	if (FIELD_GET(IDR3_RIL, reg))
3827		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3828
3829	/* IDR5 */
3830	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3831
3832	/* Maximum number of outstanding stalls */
3833	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3834
3835	/* Page sizes */
3836	if (reg & IDR5_GRAN64K)
3837		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3838	if (reg & IDR5_GRAN16K)
3839		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3840	if (reg & IDR5_GRAN4K)
3841		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3842
3843	/* Input address size */
3844	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3845		smmu->features |= ARM_SMMU_FEAT_VAX;
3846
3847	/* Output address size */
3848	switch (FIELD_GET(IDR5_OAS, reg)) {
3849	case IDR5_OAS_32_BIT:
3850		smmu->oas = 32;
3851		break;
3852	case IDR5_OAS_36_BIT:
3853		smmu->oas = 36;
3854		break;
3855	case IDR5_OAS_40_BIT:
3856		smmu->oas = 40;
3857		break;
3858	case IDR5_OAS_42_BIT:
3859		smmu->oas = 42;
3860		break;
3861	case IDR5_OAS_44_BIT:
3862		smmu->oas = 44;
3863		break;
3864	case IDR5_OAS_52_BIT:
3865		smmu->oas = 52;
3866		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3867		break;
3868	default:
3869		dev_info(smmu->dev,
3870			"unknown output address size. Truncating to 48-bit\n");
3871		fallthrough;
3872	case IDR5_OAS_48_BIT:
3873		smmu->oas = 48;
3874	}
3875
3876	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3877		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3878	else
3879		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3880
3881	/* Set the DMA mask for our table walker */
3882	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3883		dev_warn(smmu->dev,
3884			 "failed to set DMA mask for table walker\n");
3885
3886	smmu->ias = max(smmu->ias, smmu->oas);
3887
3888	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3889	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3890		smmu->features |= ARM_SMMU_FEAT_NESTING;
3891
3892	arm_smmu_device_iidr_probe(smmu);
3893
3894	if (arm_smmu_sva_supported(smmu))
3895		smmu->features |= ARM_SMMU_FEAT_SVA;
3896
3897	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3898		 smmu->ias, smmu->oas, smmu->features);
3899	return 0;
3900}
3901
3902#ifdef CONFIG_ACPI
3903static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3904{
3905	switch (model) {
3906	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3907		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3908		break;
3909	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3910		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3911		break;
3912	}
3913
3914	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3915}
3916
3917static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3918				      struct arm_smmu_device *smmu)
3919{
3920	struct acpi_iort_smmu_v3 *iort_smmu;
3921	struct device *dev = smmu->dev;
3922	struct acpi_iort_node *node;
3923
3924	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3925
3926	/* Retrieve SMMUv3 specific data */
3927	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3928
3929	acpi_smmu_get_options(iort_smmu->model, smmu);
3930
3931	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3932		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3933
3934	return 0;
3935}
3936#else
3937static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3938					     struct arm_smmu_device *smmu)
3939{
3940	return -ENODEV;
3941}
3942#endif
3943
3944static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3945				    struct arm_smmu_device *smmu)
3946{
3947	struct device *dev = &pdev->dev;
3948	u32 cells;
3949	int ret = -EINVAL;
3950
3951	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3952		dev_err(dev, "missing #iommu-cells property\n");
3953	else if (cells != 1)
3954		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3955	else
3956		ret = 0;
3957
3958	parse_driver_options(smmu);
3959
3960	if (of_dma_is_coherent(dev->of_node))
3961		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3962
3963	return ret;
3964}
3965
3966static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3967{
3968	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3969		return SZ_64K;
3970	else
3971		return SZ_128K;
3972}
3973
3974static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3975				      resource_size_t size)
3976{
3977	struct resource res = DEFINE_RES_MEM(start, size);
3978
3979	return devm_ioremap_resource(dev, &res);
3980}
3981
3982static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3983{
3984	struct list_head rmr_list;
3985	struct iommu_resv_region *e;
3986
3987	INIT_LIST_HEAD(&rmr_list);
3988	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3989
3990	list_for_each_entry(e, &rmr_list, list) {
3991		struct iommu_iort_rmr_data *rmr;
3992		int ret, i;
3993
3994		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3995		for (i = 0; i < rmr->num_sids; i++) {
3996			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3997			if (ret) {
3998				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3999					rmr->sids[i]);
4000				continue;
4001			}
4002
4003			/*
4004			 * STE table is not programmed to HW, see
4005			 * arm_smmu_initial_bypass_stes()
4006			 */
4007			arm_smmu_make_bypass_ste(smmu,
4008				arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
4009		}
4010	}
4011
4012	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
4013}
4014
4015static int arm_smmu_device_probe(struct platform_device *pdev)
4016{
4017	int irq, ret;
4018	struct resource *res;
4019	resource_size_t ioaddr;
4020	struct arm_smmu_device *smmu;
4021	struct device *dev = &pdev->dev;
4022	bool bypass;
4023
4024	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4025	if (!smmu)
4026		return -ENOMEM;
4027	smmu->dev = dev;
4028
4029	if (dev->of_node) {
4030		ret = arm_smmu_device_dt_probe(pdev, smmu);
4031	} else {
4032		ret = arm_smmu_device_acpi_probe(pdev, smmu);
4033		if (ret == -ENODEV)
4034			return ret;
4035	}
4036
4037	/* Set bypass mode according to firmware probing result */
4038	bypass = !!ret;
4039
4040	/* Base address */
4041	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4042	if (!res)
4043		return -EINVAL;
4044	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4045		dev_err(dev, "MMIO region too small (%pr)\n", res);
4046		return -EINVAL;
4047	}
4048	ioaddr = res->start;
4049
4050	/*
4051	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4052	 * the PMCG registers which are reserved by the PMU driver.
4053	 */
4054	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4055	if (IS_ERR(smmu->base))
4056		return PTR_ERR(smmu->base);
4057
4058	if (arm_smmu_resource_size(smmu) > SZ_64K) {
4059		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4060					       ARM_SMMU_REG_SZ);
4061		if (IS_ERR(smmu->page1))
4062			return PTR_ERR(smmu->page1);
4063	} else {
4064		smmu->page1 = smmu->base;
4065	}
4066
4067	/* Interrupt lines */
4068
4069	irq = platform_get_irq_byname_optional(pdev, "combined");
4070	if (irq > 0)
4071		smmu->combined_irq = irq;
4072	else {
4073		irq = platform_get_irq_byname_optional(pdev, "eventq");
4074		if (irq > 0)
4075			smmu->evtq.q.irq = irq;
4076
4077		irq = platform_get_irq_byname_optional(pdev, "priq");
4078		if (irq > 0)
4079			smmu->priq.q.irq = irq;
4080
4081		irq = platform_get_irq_byname_optional(pdev, "gerror");
4082		if (irq > 0)
4083			smmu->gerr_irq = irq;
4084	}
4085	/* Probe the h/w */
4086	ret = arm_smmu_device_hw_probe(smmu);
4087	if (ret)
4088		return ret;
4089
4090	/* Initialise in-memory data structures */
4091	ret = arm_smmu_init_structures(smmu);
4092	if (ret)
4093		return ret;
4094
4095	/* Record our private device structure */
4096	platform_set_drvdata(pdev, smmu);
4097
4098	/* Check for RMRs and install bypass STEs if any */
4099	arm_smmu_rmr_install_bypass_ste(smmu);
4100
4101	/* Reset the device */
4102	ret = arm_smmu_device_reset(smmu, bypass);
4103	if (ret)
4104		return ret;
4105
4106	/* And we're up. Go go go! */
4107	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4108				     "smmu3.%pa", &ioaddr);
4109	if (ret)
4110		return ret;
4111
4112	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
4113	if (ret) {
4114		dev_err(dev, "Failed to register iommu\n");
4115		iommu_device_sysfs_remove(&smmu->iommu);
4116		return ret;
4117	}
4118
4119	return 0;
4120}
4121
4122static void arm_smmu_device_remove(struct platform_device *pdev)
4123{
4124	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4125
4126	iommu_device_unregister(&smmu->iommu);
4127	iommu_device_sysfs_remove(&smmu->iommu);
4128	arm_smmu_device_disable(smmu);
4129	iopf_queue_free(smmu->evtq.iopf);
4130	ida_destroy(&smmu->vmid_map);
4131}
4132
4133static void arm_smmu_device_shutdown(struct platform_device *pdev)
4134{
4135	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4136
4137	arm_smmu_device_disable(smmu);
4138}
4139
4140static const struct of_device_id arm_smmu_of_match[] = {
4141	{ .compatible = "arm,smmu-v3", },
4142	{ },
4143};
4144MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4145
4146static void arm_smmu_driver_unregister(struct platform_driver *drv)
4147{
4148	arm_smmu_sva_notifier_synchronize();
4149	platform_driver_unregister(drv);
4150}
4151
4152static struct platform_driver arm_smmu_driver = {
4153	.driver	= {
4154		.name			= "arm-smmu-v3",
4155		.of_match_table		= arm_smmu_of_match,
4156		.suppress_bind_attrs	= true,
4157	},
4158	.probe	= arm_smmu_device_probe,
4159	.remove_new = arm_smmu_device_remove,
4160	.shutdown = arm_smmu_device_shutdown,
4161};
4162module_driver(arm_smmu_driver, platform_driver_register,
4163	      arm_smmu_driver_unregister);
4164
4165MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4166MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4167MODULE_ALIAS("platform:arm-smmu-v3");
4168MODULE_LICENSE("GPL v2");
4169