1/* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) 7 * Copyright (C) 2007 Maciej W. Rozycki 8 * Copyright (C) 2008 Thiemo Seufer 9 */ 10#include <linux/init.h> 11#include <linux/kernel.h> 12#include <linux/sched.h> 13#include <linux/smp.h> 14#include <linux/mm.h> 15#include <linux/module.h> 16#include <linux/proc_fs.h> 17 18#include <asm/bugs.h> 19#include <asm/cacheops.h> 20#include <asm/inst.h> 21#include <asm/io.h> 22#include <asm/page.h> 23#include <asm/pgtable.h> 24#include <asm/prefetch.h> 25#include <asm/system.h> 26#include <asm/bootinfo.h> 27#include <asm/mipsregs.h> 28#include <asm/mmu_context.h> 29#include <asm/cpu.h> 30#include <asm/war.h> 31 32#ifdef CONFIG_SIBYTE_DMA_PAGEOPS 33#include <asm/sibyte/sb1250.h> 34#include <asm/sibyte/sb1250_regs.h> 35#include <asm/sibyte/sb1250_dma.h> 36#endif 37 38#include <asm/uasm.h> 39 40/* Registers used in the assembled routines. */ 41#define ZERO 0 42#define AT 2 43#define A0 4 44#define A1 5 45#define A2 6 46#define T0 8 47#define T1 9 48#define T2 10 49#define T3 11 50#define T9 25 51#define RA 31 52 53/* Handle labels (which must be positive integers). */ 54enum label_id { 55 label_clear_nopref = 1, 56 label_clear_pref, 57 label_copy_nopref, 58 label_copy_pref_both, 59 label_copy_pref_store, 60}; 61 62UASM_L_LA(_clear_nopref) 63UASM_L_LA(_clear_pref) 64UASM_L_LA(_copy_nopref) 65UASM_L_LA(_copy_pref_both) 66UASM_L_LA(_copy_pref_store) 67 68/* We need one branch and therefore one relocation per target label. */ 69static struct uasm_label __cpuinitdata labels[5]; 70static struct uasm_reloc __cpuinitdata relocs[5]; 71 72#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) 73#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) 74 75/* 76 * Maximum sizes: 77 * 78 * R4000 128 bytes S-cache: 0x058 bytes 79 * R4600 v1.7: 0x05c bytes 80 * R4600 v2.0: 0x060 bytes 81 * With prefetching, 16 word strides 0x120 bytes 82 */ 83 84static u32 clear_page_array[0x120 / 4]; 85 86#ifdef CONFIG_SIBYTE_DMA_PAGEOPS 87void clear_page_cpu(void *page) __attribute__((alias("clear_page_array"))); 88#else 89void clear_page(void *page) __attribute__((alias("clear_page_array"))); 90#endif 91 92EXPORT_SYMBOL(clear_page); 93 94/* 95 * Maximum sizes: 96 * 97 * R4000 128 bytes S-cache: 0x11c bytes 98 * R4600 v1.7: 0x080 bytes 99 * R4600 v2.0: 0x07c bytes 100 * With prefetching, 16 word strides 0x540 bytes 101 */ 102static u32 copy_page_array[0x540 / 4]; 103 104#ifdef CONFIG_SIBYTE_DMA_PAGEOPS 105void 106copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array"))); 107#else 108void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); 109#endif 110 111EXPORT_SYMBOL(copy_page); 112 113 114static int pref_bias_clear_store __cpuinitdata; 115static int pref_bias_copy_load __cpuinitdata; 116static int pref_bias_copy_store __cpuinitdata; 117 118static u32 pref_src_mode __cpuinitdata; 119static u32 pref_dst_mode __cpuinitdata; 120 121static int clear_word_size __cpuinitdata; 122static int copy_word_size __cpuinitdata; 123 124static int half_clear_loop_size __cpuinitdata; 125static int half_copy_loop_size __cpuinitdata; 126 127static int cache_line_size __cpuinitdata; 128#define cache_line_mask() (cache_line_size - 1) 129 130static inline void __cpuinit 131pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) 132{ 133 if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { 134 if (off > 0x7fff) { 135 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 136 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 137 } else 138 uasm_i_addiu(buf, T9, ZERO, off); 139 uasm_i_daddu(buf, reg1, reg2, T9); 140 } else { 141 if (off > 0x7fff) { 142 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 143 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 144 UASM_i_ADDU(buf, reg1, reg2, T9); 145 } else 146 UASM_i_ADDIU(buf, reg1, reg2, off); 147 } 148} 149 150static void __cpuinit set_prefetch_parameters(void) 151{ 152 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) 153 clear_word_size = 8; 154 else 155 clear_word_size = 4; 156 157 if (cpu_has_64bit_gp_regs) 158 copy_word_size = 8; 159 else 160 copy_word_size = 4; 161 162 /* 163 * The pref's used here are using "streaming" hints, which cause the 164 * copied data to be kicked out of the cache sooner. A page copy often 165 * ends up copying a lot more data than is commonly used, so this seems 166 * to make sense in terms of reducing cache pollution, but I've no real 167 * performance data to back this up. 168 */ 169 if (cpu_has_prefetch) { 170 cache_line_size = cpu_dcache_line_size(); 171 switch (current_cpu_type()) { 172 case CPU_R5500: 173 case CPU_TX49XX: 174 /* These processors only support the Pref_Load. */ 175 pref_bias_copy_load = 256; 176 break; 177 178 case CPU_RM9000: 179 180 case CPU_R10000: 181 case CPU_R12000: 182 case CPU_R14000: 183 /* 184 * Those values have been experimentally tuned for an 185 * Origin 200. 186 */ 187 pref_bias_clear_store = 512; 188 pref_bias_copy_load = 256; 189 pref_bias_copy_store = 256; 190 pref_src_mode = Pref_LoadStreamed; 191 pref_dst_mode = Pref_StoreStreamed; 192 break; 193 194 case CPU_SB1: 195 case CPU_SB1A: 196 pref_bias_clear_store = 128; 197 pref_bias_copy_load = 128; 198 pref_bias_copy_store = 128; 199 /* 200 * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed 201 * hints are broken. 202 */ 203 if (current_cpu_type() == CPU_SB1 && 204 (current_cpu_data.processor_id & 0xff) < 0x02) { 205 pref_src_mode = Pref_Load; 206 pref_dst_mode = Pref_Store; 207 } else { 208 pref_src_mode = Pref_LoadStreamed; 209 pref_dst_mode = Pref_StoreStreamed; 210 } 211 break; 212 213 default: 214 pref_bias_clear_store = 128; 215 pref_bias_copy_load = 256; 216 pref_bias_copy_store = 128; 217 pref_src_mode = Pref_LoadStreamed; 218 pref_dst_mode = Pref_PrepareForStore; 219 break; 220 } 221 } else { 222 if (cpu_has_cache_cdex_s) 223 cache_line_size = cpu_scache_line_size(); 224 else if (cpu_has_cache_cdex_p) 225 cache_line_size = cpu_dcache_line_size(); 226 } 227 /* 228 * Too much unrolling will overflow the available space in 229 * clear_space_array / copy_page_array. 230 */ 231 half_clear_loop_size = min(16 * clear_word_size, 232 max(cache_line_size >> 1, 233 4 * clear_word_size)); 234 half_copy_loop_size = min(16 * copy_word_size, 235 max(cache_line_size >> 1, 236 4 * copy_word_size)); 237} 238 239static void __cpuinit build_clear_store(u32 **buf, int off) 240{ 241 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) { 242 uasm_i_sd(buf, ZERO, off, A0); 243 } else { 244 uasm_i_sw(buf, ZERO, off, A0); 245 } 246} 247 248static inline void __cpuinit build_clear_pref(u32 **buf, int off) 249{ 250 if (off & cache_line_mask()) 251 return; 252 253 if (pref_bias_clear_store) { 254 uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, 255 A0); 256 } else if (cache_line_size == (half_clear_loop_size << 1)) { 257 if (cpu_has_cache_cdex_s) { 258 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 259 } else if (cpu_has_cache_cdex_p) { 260 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 261 uasm_i_nop(buf); 262 uasm_i_nop(buf); 263 uasm_i_nop(buf); 264 uasm_i_nop(buf); 265 } 266 267 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 268 uasm_i_lw(buf, ZERO, ZERO, AT); 269 270 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 271 } 272 } 273} 274 275void __cpuinit build_clear_page(void) 276{ 277 int off; 278 u32 *buf = (u32 *)&clear_page_array; 279 struct uasm_label *l = labels; 280 struct uasm_reloc *r = relocs; 281 int i; 282 283 memset(labels, 0, sizeof(labels)); 284 memset(relocs, 0, sizeof(relocs)); 285 286 set_prefetch_parameters(); 287 288 /* 289 * This algorithm makes the following assumptions: 290 * - The prefetch bias is a multiple of 2 words. 291 * - The prefetch bias is less than one page. 292 */ 293 BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); 294 BUG_ON(PAGE_SIZE < pref_bias_clear_store); 295 296 off = PAGE_SIZE - pref_bias_clear_store; 297 if (off > 0xffff || !pref_bias_clear_store) 298 pg_addiu(&buf, A2, A0, off); 299 else 300 uasm_i_ori(&buf, A2, A0, off); 301 302 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 303 uasm_i_lui(&buf, AT, 0xa000); 304 305 off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) 306 * cache_line_size : 0; 307 while (off) { 308 build_clear_pref(&buf, -off); 309 off -= cache_line_size; 310 } 311 uasm_l_clear_pref(&l, buf); 312 do { 313 build_clear_pref(&buf, off); 314 build_clear_store(&buf, off); 315 off += clear_word_size; 316 } while (off < half_clear_loop_size); 317 pg_addiu(&buf, A0, A0, 2 * off); 318 off = -off; 319 do { 320 build_clear_pref(&buf, off); 321 if (off == -clear_word_size) 322 uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); 323 build_clear_store(&buf, off); 324 off += clear_word_size; 325 } while (off < 0); 326 327 if (pref_bias_clear_store) { 328 pg_addiu(&buf, A2, A0, pref_bias_clear_store); 329 uasm_l_clear_nopref(&l, buf); 330 off = 0; 331 do { 332 build_clear_store(&buf, off); 333 off += clear_word_size; 334 } while (off < half_clear_loop_size); 335 pg_addiu(&buf, A0, A0, 2 * off); 336 off = -off; 337 do { 338 if (off == -clear_word_size) 339 uasm_il_bne(&buf, &r, A0, A2, 340 label_clear_nopref); 341 build_clear_store(&buf, off); 342 off += clear_word_size; 343 } while (off < 0); 344 } 345 346 uasm_i_jr(&buf, RA); 347 uasm_i_nop(&buf); 348 349 BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array)); 350 351 uasm_resolve_relocs(relocs, labels); 352 353 pr_debug("Synthesized clear page handler (%u instructions).\n", 354 (u32)(buf - clear_page_array)); 355 356 pr_debug("\t.set push\n"); 357 pr_debug("\t.set noreorder\n"); 358 for (i = 0; i < (buf - clear_page_array); i++) 359 pr_debug("\t.word 0x%08x\n", clear_page_array[i]); 360 pr_debug("\t.set pop\n"); 361} 362 363static void __cpuinit build_copy_load(u32 **buf, int reg, int off) 364{ 365 if (cpu_has_64bit_gp_regs) { 366 uasm_i_ld(buf, reg, off, A1); 367 } else { 368 uasm_i_lw(buf, reg, off, A1); 369 } 370} 371 372static void __cpuinit build_copy_store(u32 **buf, int reg, int off) 373{ 374 if (cpu_has_64bit_gp_regs) { 375 uasm_i_sd(buf, reg, off, A0); 376 } else { 377 uasm_i_sw(buf, reg, off, A0); 378 } 379} 380 381static inline void build_copy_load_pref(u32 **buf, int off) 382{ 383 if (off & cache_line_mask()) 384 return; 385 386 if (pref_bias_copy_load) 387 uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1); 388} 389 390static inline void build_copy_store_pref(u32 **buf, int off) 391{ 392 if (off & cache_line_mask()) 393 return; 394 395 if (pref_bias_copy_store) { 396 uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, 397 A0); 398 } else if (cache_line_size == (half_copy_loop_size << 1)) { 399 if (cpu_has_cache_cdex_s) { 400 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 401 } else if (cpu_has_cache_cdex_p) { 402 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 403 uasm_i_nop(buf); 404 uasm_i_nop(buf); 405 uasm_i_nop(buf); 406 uasm_i_nop(buf); 407 } 408 409 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 410 uasm_i_lw(buf, ZERO, ZERO, AT); 411 412 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 413 } 414 } 415} 416 417void __cpuinit build_copy_page(void) 418{ 419 int off; 420 u32 *buf = (u32 *)©_page_array; 421 struct uasm_label *l = labels; 422 struct uasm_reloc *r = relocs; 423 int i; 424 425 memset(labels, 0, sizeof(labels)); 426 memset(relocs, 0, sizeof(relocs)); 427 428 set_prefetch_parameters(); 429 430 /* 431 * This algorithm makes the following assumptions: 432 * - All prefetch biases are multiples of 8 words. 433 * - The prefetch biases are less than one page. 434 * - The store prefetch bias isn't greater than the load 435 * prefetch bias. 436 */ 437 BUG_ON(pref_bias_copy_load % (8 * copy_word_size)); 438 BUG_ON(pref_bias_copy_store % (8 * copy_word_size)); 439 BUG_ON(PAGE_SIZE < pref_bias_copy_load); 440 BUG_ON(pref_bias_copy_store > pref_bias_copy_load); 441 442 off = PAGE_SIZE - pref_bias_copy_load; 443 if (off > 0xffff || !pref_bias_copy_load) 444 pg_addiu(&buf, A2, A0, off); 445 else 446 uasm_i_ori(&buf, A2, A0, off); 447 448 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 449 uasm_i_lui(&buf, AT, 0xa000); 450 451 off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * 452 cache_line_size : 0; 453 while (off) { 454 build_copy_load_pref(&buf, -off); 455 off -= cache_line_size; 456 } 457 off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) * 458 cache_line_size : 0; 459 while (off) { 460 build_copy_store_pref(&buf, -off); 461 off -= cache_line_size; 462 } 463 uasm_l_copy_pref_both(&l, buf); 464 do { 465 build_copy_load_pref(&buf, off); 466 build_copy_load(&buf, T0, off); 467 build_copy_load_pref(&buf, off + copy_word_size); 468 build_copy_load(&buf, T1, off + copy_word_size); 469 build_copy_load_pref(&buf, off + 2 * copy_word_size); 470 build_copy_load(&buf, T2, off + 2 * copy_word_size); 471 build_copy_load_pref(&buf, off + 3 * copy_word_size); 472 build_copy_load(&buf, T3, off + 3 * copy_word_size); 473 build_copy_store_pref(&buf, off); 474 build_copy_store(&buf, T0, off); 475 build_copy_store_pref(&buf, off + copy_word_size); 476 build_copy_store(&buf, T1, off + copy_word_size); 477 build_copy_store_pref(&buf, off + 2 * copy_word_size); 478 build_copy_store(&buf, T2, off + 2 * copy_word_size); 479 build_copy_store_pref(&buf, off + 3 * copy_word_size); 480 build_copy_store(&buf, T3, off + 3 * copy_word_size); 481 off += 4 * copy_word_size; 482 } while (off < half_copy_loop_size); 483 pg_addiu(&buf, A1, A1, 2 * off); 484 pg_addiu(&buf, A0, A0, 2 * off); 485 off = -off; 486 do { 487 build_copy_load_pref(&buf, off); 488 build_copy_load(&buf, T0, off); 489 build_copy_load_pref(&buf, off + copy_word_size); 490 build_copy_load(&buf, T1, off + copy_word_size); 491 build_copy_load_pref(&buf, off + 2 * copy_word_size); 492 build_copy_load(&buf, T2, off + 2 * copy_word_size); 493 build_copy_load_pref(&buf, off + 3 * copy_word_size); 494 build_copy_load(&buf, T3, off + 3 * copy_word_size); 495 build_copy_store_pref(&buf, off); 496 build_copy_store(&buf, T0, off); 497 build_copy_store_pref(&buf, off + copy_word_size); 498 build_copy_store(&buf, T1, off + copy_word_size); 499 build_copy_store_pref(&buf, off + 2 * copy_word_size); 500 build_copy_store(&buf, T2, off + 2 * copy_word_size); 501 build_copy_store_pref(&buf, off + 3 * copy_word_size); 502 if (off == -(4 * copy_word_size)) 503 uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both); 504 build_copy_store(&buf, T3, off + 3 * copy_word_size); 505 off += 4 * copy_word_size; 506 } while (off < 0); 507 508 if (pref_bias_copy_load - pref_bias_copy_store) { 509 pg_addiu(&buf, A2, A0, 510 pref_bias_copy_load - pref_bias_copy_store); 511 uasm_l_copy_pref_store(&l, buf); 512 off = 0; 513 do { 514 build_copy_load(&buf, T0, off); 515 build_copy_load(&buf, T1, off + copy_word_size); 516 build_copy_load(&buf, T2, off + 2 * copy_word_size); 517 build_copy_load(&buf, T3, off + 3 * copy_word_size); 518 build_copy_store_pref(&buf, off); 519 build_copy_store(&buf, T0, off); 520 build_copy_store_pref(&buf, off + copy_word_size); 521 build_copy_store(&buf, T1, off + copy_word_size); 522 build_copy_store_pref(&buf, off + 2 * copy_word_size); 523 build_copy_store(&buf, T2, off + 2 * copy_word_size); 524 build_copy_store_pref(&buf, off + 3 * copy_word_size); 525 build_copy_store(&buf, T3, off + 3 * copy_word_size); 526 off += 4 * copy_word_size; 527 } while (off < half_copy_loop_size); 528 pg_addiu(&buf, A1, A1, 2 * off); 529 pg_addiu(&buf, A0, A0, 2 * off); 530 off = -off; 531 do { 532 build_copy_load(&buf, T0, off); 533 build_copy_load(&buf, T1, off + copy_word_size); 534 build_copy_load(&buf, T2, off + 2 * copy_word_size); 535 build_copy_load(&buf, T3, off + 3 * copy_word_size); 536 build_copy_store_pref(&buf, off); 537 build_copy_store(&buf, T0, off); 538 build_copy_store_pref(&buf, off + copy_word_size); 539 build_copy_store(&buf, T1, off + copy_word_size); 540 build_copy_store_pref(&buf, off + 2 * copy_word_size); 541 build_copy_store(&buf, T2, off + 2 * copy_word_size); 542 build_copy_store_pref(&buf, off + 3 * copy_word_size); 543 if (off == -(4 * copy_word_size)) 544 uasm_il_bne(&buf, &r, A2, A0, 545 label_copy_pref_store); 546 build_copy_store(&buf, T3, off + 3 * copy_word_size); 547 off += 4 * copy_word_size; 548 } while (off < 0); 549 } 550 551 if (pref_bias_copy_store) { 552 pg_addiu(&buf, A2, A0, pref_bias_copy_store); 553 uasm_l_copy_nopref(&l, buf); 554 off = 0; 555 do { 556 build_copy_load(&buf, T0, off); 557 build_copy_load(&buf, T1, off + copy_word_size); 558 build_copy_load(&buf, T2, off + 2 * copy_word_size); 559 build_copy_load(&buf, T3, off + 3 * copy_word_size); 560 build_copy_store(&buf, T0, off); 561 build_copy_store(&buf, T1, off + copy_word_size); 562 build_copy_store(&buf, T2, off + 2 * copy_word_size); 563 build_copy_store(&buf, T3, off + 3 * copy_word_size); 564 off += 4 * copy_word_size; 565 } while (off < half_copy_loop_size); 566 pg_addiu(&buf, A1, A1, 2 * off); 567 pg_addiu(&buf, A0, A0, 2 * off); 568 off = -off; 569 do { 570 build_copy_load(&buf, T0, off); 571 build_copy_load(&buf, T1, off + copy_word_size); 572 build_copy_load(&buf, T2, off + 2 * copy_word_size); 573 build_copy_load(&buf, T3, off + 3 * copy_word_size); 574 build_copy_store(&buf, T0, off); 575 build_copy_store(&buf, T1, off + copy_word_size); 576 build_copy_store(&buf, T2, off + 2 * copy_word_size); 577 if (off == -(4 * copy_word_size)) 578 uasm_il_bne(&buf, &r, A2, A0, 579 label_copy_nopref); 580 build_copy_store(&buf, T3, off + 3 * copy_word_size); 581 off += 4 * copy_word_size; 582 } while (off < 0); 583 } 584 585 uasm_i_jr(&buf, RA); 586 uasm_i_nop(&buf); 587 588 BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array)); 589 590 uasm_resolve_relocs(relocs, labels); 591 592 pr_debug("Synthesized copy page handler (%u instructions).\n", 593 (u32)(buf - copy_page_array)); 594 595 pr_debug("\t.set push\n"); 596 pr_debug("\t.set noreorder\n"); 597 for (i = 0; i < (buf - copy_page_array); i++) 598 pr_debug("\t.word 0x%08x\n", copy_page_array[i]); 599 pr_debug("\t.set pop\n"); 600} 601 602#ifdef CONFIG_SIBYTE_DMA_PAGEOPS 603 604/* 605 * Pad descriptors to cacheline, since each is exclusively owned by a 606 * particular CPU. 607 */ 608struct dmadscr { 609 u64 dscr_a; 610 u64 dscr_b; 611 u64 pad_a; 612 u64 pad_b; 613} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS]; 614 615void sb1_dma_init(void) 616{ 617 int i; 618 619 for (i = 0; i < DM_NUM_CHANNELS; i++) { 620 const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) | 621 V_DM_DSCR_BASE_RINGSZ(1); 622 void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); 623 624 __raw_writeq(base_val, base_reg); 625 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); 626 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); 627 } 628} 629 630void clear_page(void *page) 631{ 632 u64 to_phys = CPHYSADDR((unsigned long)page); 633 unsigned int cpu = smp_processor_id(); 634 635 /* if the page is not in KSEG0, use old way */ 636 if ((long)KSEGX((unsigned long)page) != (long)CKSEG0) 637 return clear_page_cpu(page); 638 639 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | 640 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; 641 page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 642 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 643 644 /* 645 * Don't really want to do it this way, but there's no 646 * reliable way to delay completion detection. 647 */ 648 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 649 & M_DM_DSCR_BASE_INTERRUPT)) 650 ; 651 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 652} 653 654void copy_page(void *to, void *from) 655{ 656 u64 from_phys = CPHYSADDR((unsigned long)from); 657 u64 to_phys = CPHYSADDR((unsigned long)to); 658 unsigned int cpu = smp_processor_id(); 659 660 /* if any page is not in KSEG0, use old way */ 661 if ((long)KSEGX((unsigned long)to) != (long)CKSEG0 662 || (long)KSEGX((unsigned long)from) != (long)CKSEG0) 663 return copy_page_cpu(to, from); 664 665 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | 666 M_DM_DSCRA_INTERRUPT; 667 page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 668 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 669 670 /* 671 * Don't really want to do it this way, but there's no 672 * reliable way to delay completion detection. 673 */ 674 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 675 & M_DM_DSCR_BASE_INTERRUPT)) 676 ; 677 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 678} 679 680#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */ 681