/* $OpenBSD: cache_loongson2.c,v 1.8 2021/03/11 11:16:59 jsg Exp $ */ /* * Copyright (c) 2009, 2012 Miodrag Vallat. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * Cache handling code for Loongson 2E and 2F processors. * This code could be made to work on 2C by not hardcoding the number of * cache ways. * * 2E and 2F caches are : * - L1 I$ is 4-way, VIPT, 32 bytes/line, 64KB total * - L1 D$ is 4-way, VIPT, write-back, 32 bytes/line, 64KB total * - L2 is 4-way, PIPT, write-back, 32 bytes/line, 512KB total */ #include #include #include #include #include /* L1 cache operations */ #define IndexInvalidate_I 0x00 #define IndexWBInvalidate_D 0x01 #define IndexLoadTag_D 0x05 #define IndexStoreTag_D 0x09 #define HitInvalidate_D 0x11 #define HitWBInvalidate_D 0x15 #define IndexLoadData_D 0x19 #define IndexStoreData_D 0x1d /* L2 cache operations */ #define IndexWBInvalidate_S 0x03 #define IndexLoadTag_S 0x07 #define IndexStoreTag_S 0x0b #define HitInvalidate_S 0x13 #define HitWBInvalidate_S 0x17 #define IndexLoadData_S 0x1b #define IndexStoreData_S 0x1f #define cache(op,set,addr) \ __asm__ volatile \ ("cache %0, %1(%2)" :: "i"(op), "i"(set), "r"(addr) : "memory") static __inline__ void ls2f_hitinv_primary(vaddr_t, vsize_t); static __inline__ void ls2f_hitinv_secondary(vaddr_t, vsize_t); static __inline__ void ls2f_hitwbinv_primary(vaddr_t, vsize_t); static __inline__ void ls2f_hitwbinv_secondary(vaddr_t, vsize_t); #define LS2F_CACHE_LINE 32UL #define LS2F_CACHE_WAYS 4UL #define LS2F_L1_SIZE (64UL * 1024UL) #define LS2F_L2_SIZE (512UL * 1024UL) void Loongson2_ConfigCache(struct cpu_info *ci) { ci->ci_l1inst.size = LS2F_L1_SIZE; ci->ci_l1inst.linesize = LS2F_CACHE_LINE; ci->ci_l1inst.setsize = LS2F_L1_SIZE / LS2F_CACHE_WAYS; ci->ci_l1inst.sets = LS2F_CACHE_WAYS; ci->ci_l1data.size = LS2F_L1_SIZE; ci->ci_l1data.linesize = LS2F_CACHE_LINE; ci->ci_l1data.setsize = LS2F_L1_SIZE / LS2F_CACHE_WAYS; ci->ci_l1data.sets = LS2F_CACHE_WAYS; ci->ci_l2.size = LS2F_L2_SIZE; ci->ci_l2.linesize = LS2F_CACHE_LINE; ci->ci_l2.setsize = LS2F_L2_SIZE / LS2F_CACHE_WAYS; ci->ci_l2.sets = LS2F_CACHE_WAYS; memset(&ci->ci_l3, 0, sizeof(struct cache_info)); cache_valias_mask = ci->ci_l1inst.setsize & ~PAGE_MASK; /* should not happen as we use 16KB pages */ if (cache_valias_mask != 0) { cache_valias_mask |= PAGE_MASK; pmap_prefer_mask |= cache_valias_mask; } ci->ci_SyncCache = Loongson2_SyncCache; ci->ci_InvalidateICache = Loongson2_InvalidateICache; ci->ci_InvalidateICachePage = Loongson2_InvalidateICachePage; ci->ci_SyncICache = Loongson2_SyncICache; ci->ci_SyncDCachePage = Loongson2_SyncDCachePage; ci->ci_HitSyncDCachePage = Loongson2_SyncDCachePage; ci->ci_HitSyncDCache = Loongson2_HitSyncDCache; ci->ci_HitInvalidateDCache = Loongson2_HitInvalidateDCache; ci->ci_IOSyncDCache = Loongson2_IOSyncDCache; } /* * Writeback and invalidate all caches. */ void Loongson2_SyncCache(struct cpu_info *ci) { vaddr_t sva, eva; mips_sync(); sva = PHYS_TO_XKPHYS(0, CCA_CACHED); eva = sva + LS2F_L1_SIZE / LS2F_CACHE_WAYS; while (sva != eva) { cache(IndexInvalidate_I, 0, sva); sva += LS2F_CACHE_LINE; } sva = PHYS_TO_XKPHYS(0, CCA_CACHED); eva = sva + LS2F_L1_SIZE / LS2F_CACHE_WAYS; while (sva != eva) { cache(IndexWBInvalidate_D, 0, sva); cache(IndexWBInvalidate_D, 1, sva); cache(IndexWBInvalidate_D, 2, sva); cache(IndexWBInvalidate_D, 3, sva); sva += LS2F_CACHE_LINE; } sva = PHYS_TO_XKPHYS(0, CCA_CACHED); eva = sva + LS2F_L2_SIZE / LS2F_CACHE_WAYS; while (sva != eva) { cache(IndexWBInvalidate_S, 0, sva); cache(IndexWBInvalidate_S, 1, sva); cache(IndexWBInvalidate_S, 2, sva); cache(IndexWBInvalidate_S, 3, sva); sva += LS2F_CACHE_LINE; } } /* * Invalidate I$ for the given range. */ void Loongson2_InvalidateICache(struct cpu_info *ci, vaddr_t _va, size_t _sz) { vaddr_t va, sva, eva; vsize_t sz; /* extend the range to integral cache lines */ va = _va & ~(LS2F_CACHE_LINE - 1); sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va; sva = PHYS_TO_XKPHYS(0, CCA_CACHED); /* keep only the index bits */ sva |= va & ((1UL << 14) - 1); eva = sva + sz; while (sva != eva) { cache(IndexInvalidate_I, 0, sva); sva += LS2F_CACHE_LINE; } } /* * Register a given page for I$ invalidation. */ void Loongson2_InvalidateICachePage(struct cpu_info *ci, vaddr_t va) { /* * Since the page size matches the I$ set size, and I$ maintenance * operations always operate on all the sets, all we need to do here * is remember there are postponed flushes. */ ci->ci_cachepending_l1i = 1; } /* * Perform postponed I$ invalidation. */ void Loongson2_SyncICache(struct cpu_info *ci) { vaddr_t sva, eva; if (ci->ci_cachepending_l1i != 0) { /* inline Loongson2_InvalidateICache(ci, 0, PAGE_SIZE); */ sva = PHYS_TO_XKPHYS(0, CCA_CACHED); eva = sva + PAGE_SIZE; while (sva != eva) { cache(IndexInvalidate_I, 0, sva); sva += LS2F_CACHE_LINE; } ci->ci_cachepending_l1i = 0; } } /* * Writeback D$ for the given page. * * The index for L1 is the low 14 bits of the virtual address. Since the * page size is 2**14 bytes, it is possible to access the page through * any valid address. */ void Loongson2_SyncDCachePage(struct cpu_info *ci, vaddr_t va, paddr_t pa) { vaddr_t sva, eva; mips_sync(); sva = PHYS_TO_XKPHYS(pa, CCA_CACHED); eva = sva + PAGE_SIZE; for (va = sva; va != eva; va += LS2F_CACHE_LINE) cache(HitWBInvalidate_D, 0, va); for (va = sva; va != eva; va += LS2F_CACHE_LINE) cache(HitWBInvalidate_S, 0, va); } /* * Writeback D$ for the given range. Range is expected to be currently * mapped, allowing the use of `Hit' operations. This is less aggressive * than using `Index' operations. */ static __inline__ void ls2f_hitwbinv_primary(vaddr_t va, vsize_t sz) { vaddr_t eva; eva = va + sz; while (va != eva) { cache(HitWBInvalidate_D, 0, va); va += LS2F_CACHE_LINE; } } static __inline__ void ls2f_hitwbinv_secondary(vaddr_t va, vsize_t sz) { vaddr_t eva; eva = va + sz; while (va != eva) { cache(HitWBInvalidate_S, 0, va); va += LS2F_CACHE_LINE; } } void Loongson2_HitSyncDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz) { vaddr_t va; vsize_t sz; mips_sync(); /* extend the range to integral cache lines */ va = _va & ~(LS2F_CACHE_LINE - 1); sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va; ls2f_hitwbinv_primary(va, sz); ls2f_hitwbinv_secondary(va, sz); } /* * Invalidate D$ for the given range. Range is expected to be currently * mapped, allowing the use of `Hit' operations. This is less aggressive * than using `Index' operations. */ static __inline__ void ls2f_hitinv_primary(vaddr_t va, vsize_t sz) { vaddr_t eva; eva = va + sz; while (va != eva) { cache(HitInvalidate_D, 0, va); va += LS2F_CACHE_LINE; } } static __inline__ void ls2f_hitinv_secondary(vaddr_t va, vsize_t sz) { vaddr_t eva; eva = va + sz; while (va != eva) { cache(HitInvalidate_S, 0, va); va += LS2F_CACHE_LINE; } } void Loongson2_HitInvalidateDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz) { vaddr_t va; vsize_t sz; /* extend the range to integral cache lines */ va = _va & ~(LS2F_CACHE_LINE - 1); sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va; ls2f_hitinv_primary(va, sz); ls2f_hitinv_secondary(va, sz); mips_sync(); } /* * Backend for bus_dmamap_sync(). Enforce coherency of the given range * by performing the necessary cache writeback and/or invalidate * operations. */ void Loongson2_IOSyncDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz, int how) { vaddr_t va; vsize_t sz; int partial_start, partial_end; /* extend the range to integral cache lines */ va = _va & ~(LS2F_CACHE_LINE - 1); sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va; switch (how) { case CACHE_SYNC_R: /* writeback partial cachelines */ if (((_va | _sz) & (LS2F_CACHE_LINE - 1)) != 0) { partial_start = va != _va; partial_end = va + sz != _va + _sz; } else { partial_start = partial_end = 0; } if (partial_start) { cache(HitWBInvalidate_D, 0, va); cache(HitWBInvalidate_S, 0, va); va += LS2F_CACHE_LINE; sz -= LS2F_CACHE_LINE; } if (sz != 0 && partial_end) { cache(HitWBInvalidate_D, 0, va + sz - LS2F_CACHE_LINE); cache(HitWBInvalidate_S, 0, va + sz - LS2F_CACHE_LINE); sz -= LS2F_CACHE_LINE; } ls2f_hitinv_primary(va, sz); ls2f_hitinv_secondary(va, sz); break; case CACHE_SYNC_X: case CACHE_SYNC_W: ls2f_hitwbinv_primary(va, sz); ls2f_hitwbinv_secondary(va, sz); break; } }