cache_loongson2.c revision 1.7
1/*	$OpenBSD: cache_loongson2.c,v 1.7 2016/01/05 05:27:54 visa Exp $	*/
2
3/*
4 * Copyright (c) 2009, 2012 Miodrag Vallat.
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19/*
20 * Cache handling code for Loongson 2E and 2F processors.
21 * This code could be made to work on 2C by not hardcoding the number of
22 * cache ways.
23 *
24 * 2E and 2F caches are :
25 * - L1 I$ is 4-way, VIPT, 32 bytes/line, 64KB total
26 * - L1 D$ is 4-way, VIPT, write-back, 32 bytes/line, 64KB total
27 * - L2 is 4-way, PIPT, write-back, 32 bytes/line, 512KB total
28 */
29
30#include <sys/param.h>
31#include <sys/systm.h>
32
33#include <mips64/cache.h>
34#include <machine/cpu.h>
35
36#include <uvm/uvm_extern.h>
37
38/* L1 cache operations */
39#define	IndexInvalidate_I	0x00
40#define	IndexWBInvalidate_D	0x01
41#define	IndexLoadTag_D		0x05
42#define	IndexStoreTag_D		0x09
43#define	HitInvalidate_D		0x11
44#define	HitWBInvalidate_D	0x15
45#define	IndexLoadData_D		0x19
46#define	IndexStoreData_D	0x1d
47
48/* L2 cache operations */
49#define	IndexWBInvalidate_S	0x03
50#define	IndexLoadTag_S		0x07
51#define	IndexStoreTag_S		0x0b
52#define	HitInvalidate_S		0x13
53#define	HitWBInvalidate_S	0x17
54#define	IndexLoadData_S		0x1b
55#define	IndexStoreData_S	0x1f
56
57#define	cache(op,set,addr) \
58    __asm__ volatile \
59      ("cache %0, %1(%2)" :: "i"(op), "i"(set), "r"(addr) : "memory")
60
61static __inline__ void	ls2f_hitinv_primary(vaddr_t, vsize_t);
62static __inline__ void	ls2f_hitinv_secondary(vaddr_t, vsize_t);
63static __inline__ void	ls2f_hitwbinv_primary(vaddr_t, vsize_t);
64static __inline__ void	ls2f_hitwbinv_secondary(vaddr_t, vsize_t);
65
66#define	LS2F_CACHE_LINE	32UL
67#define	LS2F_CACHE_WAYS	4UL
68#define	LS2F_L1_SIZE		(64UL * 1024UL)
69#define	LS2F_L2_SIZE		(512UL * 1024UL)
70
71void
72Loongson2_ConfigCache(struct cpu_info *ci)
73{
74	ci->ci_l1inst.size = LS2F_L1_SIZE;
75	ci->ci_l1inst.linesize = LS2F_CACHE_LINE;
76	ci->ci_l1inst.setsize = LS2F_L1_SIZE / LS2F_CACHE_WAYS;
77	ci->ci_l1inst.sets = LS2F_CACHE_WAYS;
78
79	ci->ci_l1data.size = LS2F_L1_SIZE;
80	ci->ci_l1data.linesize = LS2F_CACHE_LINE;
81	ci->ci_l1data.setsize = LS2F_L1_SIZE / LS2F_CACHE_WAYS;
82	ci->ci_l1data.sets = LS2F_CACHE_WAYS;
83
84	ci->ci_l2.size = LS2F_L2_SIZE;
85	ci->ci_l2.linesize = LS2F_CACHE_LINE;
86	ci->ci_l2.setsize = LS2F_L2_SIZE / LS2F_CACHE_WAYS;
87	ci->ci_l2.sets = LS2F_CACHE_WAYS;
88
89	memset(&ci->ci_l3, 0, sizeof(struct cache_info));
90
91	cache_valias_mask = ci->ci_l1inst.setsize & ~PAGE_MASK;
92
93	/* should not happen as we use 16KB pages */
94	if (cache_valias_mask != 0) {
95		cache_valias_mask |= PAGE_MASK;
96		pmap_prefer_mask |= cache_valias_mask;
97	}
98
99	ci->ci_SyncCache = Loongson2_SyncCache;
100	ci->ci_InvalidateICache = Loongson2_InvalidateICache;
101	ci->ci_InvalidateICachePage = Loongson2_InvalidateICachePage;
102	ci->ci_SyncICache = Loongson2_SyncICache;
103	ci->ci_SyncDCachePage = Loongson2_SyncDCachePage;
104	ci->ci_HitSyncDCachePage = Loongson2_SyncDCachePage;
105	ci->ci_HitSyncDCache = Loongson2_HitSyncDCache;
106	ci->ci_HitInvalidateDCache = Loongson2_HitInvalidateDCache;
107	ci->ci_IOSyncDCache = Loongson2_IOSyncDCache;
108}
109
110/*
111 * Writeback and invalidate all caches.
112 */
113void
114Loongson2_SyncCache(struct cpu_info *ci)
115{
116	vaddr_t sva, eva;
117
118	mips_sync();
119
120	sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
121	eva = sva + LS2F_L1_SIZE / LS2F_CACHE_WAYS;
122	while (sva != eva) {
123		cache(IndexInvalidate_I, 0, sva);
124		sva += LS2F_CACHE_LINE;
125	}
126
127	sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
128	eva = sva + LS2F_L1_SIZE / LS2F_CACHE_WAYS;
129	while (sva != eva) {
130		cache(IndexWBInvalidate_D, 0, sva);
131		cache(IndexWBInvalidate_D, 1, sva);
132		cache(IndexWBInvalidate_D, 2, sva);
133		cache(IndexWBInvalidate_D, 3, sva);
134		sva += LS2F_CACHE_LINE;
135	}
136
137	sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
138	eva = sva + LS2F_L2_SIZE / LS2F_CACHE_WAYS;
139	while (sva != eva) {
140		cache(IndexWBInvalidate_S, 0, sva);
141		cache(IndexWBInvalidate_S, 1, sva);
142		cache(IndexWBInvalidate_S, 2, sva);
143		cache(IndexWBInvalidate_S, 3, sva);
144		sva += LS2F_CACHE_LINE;
145	}
146}
147
148/*
149 * Invalidate I$ for the given range.
150 */
151void
152Loongson2_InvalidateICache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
153{
154	vaddr_t va, sva, eva;
155	vsize_t sz;
156
157	/* extend the range to integral cache lines */
158	va = _va & ~(LS2F_CACHE_LINE - 1);
159	sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
160
161	sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
162	/* keep only the index bits */
163	sva |= va & ((1UL << 14) - 1);
164	eva = sva + sz;
165	while (sva != eva) {
166		cache(IndexInvalidate_I, 0, sva);
167		sva += LS2F_CACHE_LINE;
168	}
169}
170
171/*
172 * Register a given page for I$ invalidation.
173 */
174void
175Loongson2_InvalidateICachePage(struct cpu_info *ci, vaddr_t va)
176{
177	/*
178	 * Since the page size matches the I$ set size, and I$ maintainance
179	 * operations always operate on all the sets, all we need to do here
180	 * is remember there are postponed flushes.
181	 */
182	ci->ci_cachepending_l1i = 1;
183}
184
185/*
186 * Perform postponed I$ invalidation.
187 */
188void
189Loongson2_SyncICache(struct cpu_info *ci)
190{
191	vaddr_t sva, eva;
192
193	if (ci->ci_cachepending_l1i != 0) {
194		/* inline Loongson2_InvalidateICache(ci, 0, PAGE_SIZE); */
195		sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
196		eva = sva + PAGE_SIZE;
197		while (sva != eva) {
198			cache(IndexInvalidate_I, 0, sva);
199			sva += LS2F_CACHE_LINE;
200		}
201
202		ci->ci_cachepending_l1i = 0;
203	}
204}
205
206/*
207 * Writeback D$ for the given page.
208 *
209 * The index for L1 is the low 14 bits of the virtual address. Since the
210 * page size is 2**14 bytes, it is possible to access the page through
211 * any valid address.
212 */
213void
214Loongson2_SyncDCachePage(struct cpu_info *ci, vaddr_t va, paddr_t pa)
215{
216	vaddr_t sva, eva;
217
218	mips_sync();
219
220	sva = PHYS_TO_XKPHYS(pa, CCA_CACHED);
221	eva = sva + PAGE_SIZE;
222	for (va = sva; va != eva; va += LS2F_CACHE_LINE)
223		cache(HitWBInvalidate_D, 0, va);
224	for (va = sva; va != eva; va += LS2F_CACHE_LINE)
225		cache(HitWBInvalidate_S, 0, va);
226}
227
228/*
229 * Writeback D$ for the given range. Range is expected to be currently
230 * mapped, allowing the use of `Hit' operations. This is less aggressive
231 * than using `Index' operations.
232 */
233
234static __inline__ void
235ls2f_hitwbinv_primary(vaddr_t va, vsize_t sz)
236{
237	vaddr_t eva;
238
239	eva = va + sz;
240	while (va != eva) {
241		cache(HitWBInvalidate_D, 0, va);
242		va += LS2F_CACHE_LINE;
243	}
244}
245
246static __inline__ void
247ls2f_hitwbinv_secondary(vaddr_t va, vsize_t sz)
248{
249	vaddr_t eva;
250
251	eva = va + sz;
252	while (va != eva) {
253		cache(HitWBInvalidate_S, 0, va);
254		va += LS2F_CACHE_LINE;
255	}
256}
257
258void
259Loongson2_HitSyncDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
260{
261	vaddr_t va;
262	vsize_t sz;
263
264	mips_sync();
265
266	/* extend the range to integral cache lines */
267	va = _va & ~(LS2F_CACHE_LINE - 1);
268	sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
269
270	ls2f_hitwbinv_primary(va, sz);
271	ls2f_hitwbinv_secondary(va, sz);
272}
273
274/*
275 * Invalidate D$ for the given range. Range is expected to be currently
276 * mapped, allowing the use of `Hit' operations. This is less aggressive
277 * than using `Index' operations.
278 */
279
280static __inline__ void
281ls2f_hitinv_primary(vaddr_t va, vsize_t sz)
282{
283	vaddr_t eva;
284
285	eva = va + sz;
286	while (va != eva) {
287		cache(HitInvalidate_D, 0, va);
288		va += LS2F_CACHE_LINE;
289	}
290}
291
292static __inline__ void
293ls2f_hitinv_secondary(vaddr_t va, vsize_t sz)
294{
295	vaddr_t eva;
296
297	eva = va + sz;
298	while (va != eva) {
299		cache(HitInvalidate_S, 0, va);
300		va += LS2F_CACHE_LINE;
301	}
302}
303
304void
305Loongson2_HitInvalidateDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
306{
307	vaddr_t va;
308	vsize_t sz;
309
310	/* extend the range to integral cache lines */
311	va = _va & ~(LS2F_CACHE_LINE - 1);
312	sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
313
314	ls2f_hitinv_primary(va, sz);
315	ls2f_hitinv_secondary(va, sz);
316
317	mips_sync();
318}
319
320/*
321 * Backend for bus_dmamap_sync(). Enforce coherency of the given range
322 * by performing the necessary cache writeback and/or invalidate
323 * operations.
324 */
325void
326Loongson2_IOSyncDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz, int how)
327{
328	vaddr_t va;
329	vsize_t sz;
330	int partial_start, partial_end;
331
332	/* extend the range to integral cache lines */
333	va = _va & ~(LS2F_CACHE_LINE - 1);
334	sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
335
336	switch (how) {
337	case CACHE_SYNC_R:
338		/* writeback partial cachelines */
339		if (((_va | _sz) & (LS2F_CACHE_LINE - 1)) != 0) {
340			partial_start = va != _va;
341			partial_end = va + sz != _va + _sz;
342		} else {
343			partial_start = partial_end = 0;
344		}
345		if (partial_start) {
346			cache(HitWBInvalidate_D, 0, va);
347			cache(HitWBInvalidate_S, 0, va);
348			va += LS2F_CACHE_LINE;
349			sz -= LS2F_CACHE_LINE;
350		}
351		if (sz != 0 && partial_end) {
352			cache(HitWBInvalidate_D, 0, va + sz - LS2F_CACHE_LINE);
353			cache(HitWBInvalidate_S, 0, va + sz - LS2F_CACHE_LINE);
354			sz -= LS2F_CACHE_LINE;
355		}
356		ls2f_hitinv_primary(va, sz);
357		ls2f_hitinv_secondary(va, sz);
358		break;
359	case CACHE_SYNC_X:
360	case CACHE_SYNC_W:
361		ls2f_hitwbinv_primary(va, sz);
362		ls2f_hitwbinv_secondary(va, sz);
363		break;
364	}
365}
366