1/* 2 * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) 3 * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org) 4 * Copyright (C) 2000 SiByte, Inc. 5 * Copyright (C) 2005 Thiemo Seufer 6 * 7 * Written by Justin Carlson of SiByte, Inc. 8 * and Kip Walker of Broadcom Corp. 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License 13 * as published by the Free Software Foundation; either version 2 14 * of the License, or (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 24 */ 25#include <linux/module.h> 26#include <linux/sched.h> 27#include <linux/smp.h> 28 29#include <asm/io.h> 30#include <asm/sibyte/sb1250.h> 31#include <asm/sibyte/sb1250_regs.h> 32#include <asm/sibyte/sb1250_dma.h> 33 34#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS 35#define SB1_PREF_LOAD_STREAMED_HINT "0" 36#define SB1_PREF_STORE_STREAMED_HINT "1" 37#else 38#define SB1_PREF_LOAD_STREAMED_HINT "4" 39#define SB1_PREF_STORE_STREAMED_HINT "5" 40#endif 41 42static inline void clear_page_cpu(void *page) 43{ 44 unsigned char *addr = (unsigned char *) page; 45 unsigned char *end = addr + PAGE_SIZE; 46 47 /* 48 * JDCXXX - This should be bottlenecked by the write buffer, but these 49 * things tend to be mildly unpredictable...should check this on the 50 * performance model 51 * 52 * We prefetch 4 lines ahead. We're also "cheating" slightly here... 53 * since we know we're on an SB1, we force the assembler to take 54 * 64-bit operands to speed things up 55 */ 56 __asm__ __volatile__( 57 " .set push \n" 58 " .set mips4 \n" 59 " .set noreorder \n" 60#ifdef CONFIG_CPU_HAS_PREFETCH 61 " daddiu %0, %0, 128 \n" 62 " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%0) \n" 63 /* Prefetch the first 4 lines */ 64 " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%0) \n" 65 " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%0) \n" 66 " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n" 67 "1: sd $0, -128(%0) \n" /* Throw out a cacheline of 0's */ 68 " sd $0, -120(%0) \n" 69 " sd $0, -112(%0) \n" 70 " sd $0, -104(%0) \n" 71 " daddiu %0, %0, 32 \n" 72 " bnel %0, %1, 1b \n" 73 " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n" 74 " daddiu %0, %0, -128 \n" 75#endif 76 " sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */ 77 "1: sd $0, 8(%0) \n" 78 " sd $0, 16(%0) \n" 79 " sd $0, 24(%0) \n" 80 " daddiu %0, %0, 32 \n" 81 " bnel %0, %1, 1b \n" 82 " sd $0, 0(%0) \n" 83 " .set pop \n" 84 : "+r" (addr) 85 : "r" (end) 86 : "memory"); 87} 88 89static inline void copy_page_cpu(void *to, void *from) 90{ 91 unsigned char *src = (unsigned char *)from; 92 unsigned char *dst = (unsigned char *)to; 93 unsigned char *end = src + PAGE_SIZE; 94 95 /* 96 * The pref's used here are using "streaming" hints, which cause the 97 * copied data to be kicked out of the cache sooner. A page copy often 98 * ends up copying a lot more data than is commonly used, so this seems 99 * to make sense in terms of reducing cache pollution, but I've no real 100 * performance data to back this up 101 */ 102 __asm__ __volatile__( 103 " .set push \n" 104 " .set mips4 \n" 105 " .set noreorder \n" 106#ifdef CONFIG_CPU_HAS_PREFETCH 107 " daddiu %0, %0, 128 \n" 108 " daddiu %1, %1, 128 \n" 109 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -128(%0)\n" 110 /* Prefetch the first 4 lines */ 111 " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n" 112 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -96(%0)\n" 113 " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%1)\n" 114 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -64(%0)\n" 115 " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%1)\n" 116 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n" 117 "1: pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%1)\n" 118# ifdef CONFIG_64BIT 119 " ld $8, -128(%0) \n" /* Block copy a cacheline */ 120 " ld $9, -120(%0) \n" 121 " ld $10, -112(%0) \n" 122 " ld $11, -104(%0) \n" 123 " sd $8, -128(%1) \n" 124 " sd $9, -120(%1) \n" 125 " sd $10, -112(%1) \n" 126 " sd $11, -104(%1) \n" 127# else 128 " lw $2, -128(%0) \n" /* Block copy a cacheline */ 129 " lw $3, -124(%0) \n" 130 " lw $6, -120(%0) \n" 131 " lw $7, -116(%0) \n" 132 " lw $8, -112(%0) \n" 133 " lw $9, -108(%0) \n" 134 " lw $10, -104(%0) \n" 135 " lw $11, -100(%0) \n" 136 " sw $2, -128(%1) \n" 137 " sw $3, -124(%1) \n" 138 " sw $6, -120(%1) \n" 139 " sw $7, -116(%1) \n" 140 " sw $8, -112(%1) \n" 141 " sw $9, -108(%1) \n" 142 " sw $10, -104(%1) \n" 143 " sw $11, -100(%1) \n" 144# endif 145 " daddiu %0, %0, 32 \n" 146 " daddiu %1, %1, 32 \n" 147 " bnel %0, %2, 1b \n" 148 " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n" 149 " daddiu %0, %0, -128 \n" 150 " daddiu %1, %1, -128 \n" 151#endif 152#ifdef CONFIG_64BIT 153 " ld $8, 0(%0) \n" /* Block copy a cacheline */ 154 "1: ld $9, 8(%0) \n" 155 " ld $10, 16(%0) \n" 156 " ld $11, 24(%0) \n" 157 " sd $8, 0(%1) \n" 158 " sd $9, 8(%1) \n" 159 " sd $10, 16(%1) \n" 160 " sd $11, 24(%1) \n" 161#else 162 " lw $2, 0(%0) \n" /* Block copy a cacheline */ 163 "1: lw $3, 4(%0) \n" 164 " lw $6, 8(%0) \n" 165 " lw $7, 12(%0) \n" 166 " lw $8, 16(%0) \n" 167 " lw $9, 20(%0) \n" 168 " lw $10, 24(%0) \n" 169 " lw $11, 28(%0) \n" 170 " sw $2, 0(%1) \n" 171 " sw $3, 4(%1) \n" 172 " sw $6, 8(%1) \n" 173 " sw $7, 12(%1) \n" 174 " sw $8, 16(%1) \n" 175 " sw $9, 20(%1) \n" 176 " sw $10, 24(%1) \n" 177 " sw $11, 28(%1) \n" 178#endif 179 " daddiu %0, %0, 32 \n" 180 " daddiu %1, %1, 32 \n" 181 " bnel %0, %2, 1b \n" 182#ifdef CONFIG_64BIT 183 " ld $8, 0(%0) \n" 184#else 185 " lw $2, 0(%0) \n" 186#endif 187 " .set pop \n" 188 : "+r" (src), "+r" (dst) 189 : "r" (end) 190#ifdef CONFIG_64BIT 191 : "$8","$9","$10","$11","memory"); 192#else 193 : "$2","$3","$6","$7","$8","$9","$10","$11","memory"); 194#endif 195} 196 197 198#ifdef CONFIG_SIBYTE_DMA_PAGEOPS 199 200/* 201 * Pad descriptors to cacheline, since each is exclusively owned by a 202 * particular CPU. 203 */ 204typedef struct dmadscr_s { 205 u64 dscr_a; 206 u64 dscr_b; 207 u64 pad_a; 208 u64 pad_b; 209} dmadscr_t; 210 211static dmadscr_t page_descr[DM_NUM_CHANNELS] 212 __attribute__((aligned(SMP_CACHE_BYTES))); 213 214void sb1_dma_init(void) 215{ 216 int i; 217 218 for (i = 0; i < DM_NUM_CHANNELS; i++) { 219 const u64 base_val = CPHYSADDR(&page_descr[i]) | 220 V_DM_DSCR_BASE_RINGSZ(1); 221 void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); 222 223 __raw_writeq(base_val, base_reg); 224 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); 225 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); 226 } 227} 228 229void clear_page(void *page) 230{ 231 u64 to_phys = CPHYSADDR(page); 232 unsigned int cpu = smp_processor_id(); 233 234 /* if the page is not in KSEG0, use old way */ 235 if ((long)KSEGX(page) != (long)CKSEG0) 236 return clear_page_cpu(page); 237 238 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | 239 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; 240 page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 241 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 242 243 /* 244 * Don't really want to do it this way, but there's no 245 * reliable way to delay completion detection. 246 */ 247 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 248 & M_DM_DSCR_BASE_INTERRUPT)) 249 ; 250 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 251} 252 253void copy_page(void *to, void *from) 254{ 255 u64 from_phys = CPHYSADDR(from); 256 u64 to_phys = CPHYSADDR(to); 257 unsigned int cpu = smp_processor_id(); 258 259 /* if any page is not in KSEG0, use old way */ 260 if ((long)KSEGX(to) != (long)CKSEG0 261 || (long)KSEGX(from) != (long)CKSEG0) 262 return copy_page_cpu(to, from); 263 264 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | 265 M_DM_DSCRA_INTERRUPT; 266 page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 267 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 268 269 /* 270 * Don't really want to do it this way, but there's no 271 * reliable way to delay completion detection. 272 */ 273 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 274 & M_DM_DSCR_BASE_INTERRUPT)) 275 ; 276 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 277} 278 279#else /* !CONFIG_SIBYTE_DMA_PAGEOPS */ 280 281void clear_page(void *page) 282{ 283 return clear_page_cpu(page); 284} 285 286void copy_page(void *to, void *from) 287{ 288 return copy_page_cpu(to, from); 289} 290 291#endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */ 292 293EXPORT_SYMBOL(clear_page); 294EXPORT_SYMBOL(copy_page); 295