1/* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Synthesize TLB refill handlers at runtime. 7 * 8 * Copyright (C) 2004,2005,2006 by Thiemo Seufer 9 * Copyright (C) 2005 Maciej W. Rozycki 10 * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) 11 * 12 * ... and the days got worse and worse and now you see 13 * I've gone completly out of my mind. 14 * 15 * They're coming to take me a away haha 16 * they're coming to take me a away hoho hihi haha 17 * to the funny farm where code is beautiful all the time ... 18 * 19 * (Condolences to Napoleon XIV) 20 */ 21 22#include <stdarg.h> 23 24#include <linux/mm.h> 25#include <linux/kernel.h> 26#include <linux/types.h> 27#include <linux/string.h> 28#include <linux/init.h> 29 30#include <asm/pgtable.h> 31#include <asm/cacheflush.h> 32#include <asm/mmu_context.h> 33#include <asm/inst.h> 34#include <asm/elf.h> 35#include <asm/smp.h> 36#include <asm/war.h> 37 38static __init int __maybe_unused r45k_bvahwbug(void) 39{ 40 return 0; 41} 42 43static __init int __maybe_unused r4k_250MHZhwbug(void) 44{ 45 return 0; 46} 47 48static __init int __maybe_unused bcm1250_m3_war(void) 49{ 50 return BCM1250_M3_WAR; 51} 52 53static __init int __maybe_unused r10000_llsc_war(void) 54{ 55 return R10000_LLSC_WAR; 56} 57 58/* 59 * A little micro-assembler, intended for TLB refill handler 60 * synthesizing. It is intentionally kept simple, does only support 61 * a subset of instructions, and does not try to hide pipeline effects 62 * like branch delay slots. 63 */ 64 65enum fields 66{ 67 RS = 0x001, 68 RT = 0x002, 69 RD = 0x004, 70 RE = 0x008, 71 SIMM = 0x010, 72 UIMM = 0x020, 73 BIMM = 0x040, 74 JIMM = 0x080, 75 FUNC = 0x100, 76 SET = 0x200 77}; 78 79#define OP_MASK 0x2f 80#define OP_SH 26 81#define RS_MASK 0x1f 82#define RS_SH 21 83#define RT_MASK 0x1f 84#define RT_SH 16 85#define RD_MASK 0x1f 86#define RD_SH 11 87#define RE_MASK 0x1f 88#define RE_SH 6 89#define IMM_MASK 0xffff 90#define IMM_SH 0 91#define JIMM_MASK 0x3ffffff 92#define JIMM_SH 0 93#define FUNC_MASK 0x2f 94#define FUNC_SH 0 95#define SET_MASK 0x7 96#define SET_SH 0 97 98enum opcode { 99 insn_invalid, 100 insn_addu, insn_addiu, insn_and, insn_andi, insn_beq, 101 insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, 102 insn_bne, insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0, 103 insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, 104 insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld, 105 insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, insn_mtc0, 106 insn_ori, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll, 107 insn_sra, insn_srl, insn_subu, insn_sw, insn_tlbp, insn_tlbwi, 108 insn_tlbwr, insn_xor, insn_xori 109}; 110 111struct insn { 112 enum opcode opcode; 113 u32 match; 114 enum fields fields; 115}; 116 117/* This macro sets the non-variable bits of an instruction. */ 118#define M(a, b, c, d, e, f) \ 119 ((a) << OP_SH \ 120 | (b) << RS_SH \ 121 | (c) << RT_SH \ 122 | (d) << RD_SH \ 123 | (e) << RE_SH \ 124 | (f) << FUNC_SH) 125 126static __initdata struct insn insn_table[] = { 127 { insn_addiu, M(addiu_op,0,0,0,0,0), RS | RT | SIMM }, 128 { insn_addu, M(spec_op,0,0,0,0,addu_op), RS | RT | RD }, 129 { insn_and, M(spec_op,0,0,0,0,and_op), RS | RT | RD }, 130 { insn_andi, M(andi_op,0,0,0,0,0), RS | RT | UIMM }, 131 { insn_beq, M(beq_op,0,0,0,0,0), RS | RT | BIMM }, 132 { insn_beql, M(beql_op,0,0,0,0,0), RS | RT | BIMM }, 133 { insn_bgez, M(bcond_op,0,bgez_op,0,0,0), RS | BIMM }, 134 { insn_bgezl, M(bcond_op,0,bgezl_op,0,0,0), RS | BIMM }, 135 { insn_bltz, M(bcond_op,0,bltz_op,0,0,0), RS | BIMM }, 136 { insn_bltzl, M(bcond_op,0,bltzl_op,0,0,0), RS | BIMM }, 137 { insn_bne, M(bne_op,0,0,0,0,0), RS | RT | BIMM }, 138 { insn_daddiu, M(daddiu_op,0,0,0,0,0), RS | RT | SIMM }, 139 { insn_daddu, M(spec_op,0,0,0,0,daddu_op), RS | RT | RD }, 140 { insn_dmfc0, M(cop0_op,dmfc_op,0,0,0,0), RT | RD | SET}, 141 { insn_dmtc0, M(cop0_op,dmtc_op,0,0,0,0), RT | RD | SET}, 142 { insn_dsll, M(spec_op,0,0,0,0,dsll_op), RT | RD | RE }, 143 { insn_dsll32, M(spec_op,0,0,0,0,dsll32_op), RT | RD | RE }, 144 { insn_dsra, M(spec_op,0,0,0,0,dsra_op), RT | RD | RE }, 145 { insn_dsrl, M(spec_op,0,0,0,0,dsrl_op), RT | RD | RE }, 146 { insn_dsrl32, M(spec_op,0,0,0,0,dsrl32_op), RT | RD | RE }, 147 { insn_dsubu, M(spec_op,0,0,0,0,dsubu_op), RS | RT | RD }, 148 { insn_eret, M(cop0_op,cop_op,0,0,0,eret_op), 0 }, 149 { insn_j, M(j_op,0,0,0,0,0), JIMM }, 150 { insn_jal, M(jal_op,0,0,0,0,0), JIMM }, 151 { insn_jr, M(spec_op,0,0,0,0,jr_op), RS }, 152 { insn_ld, M(ld_op,0,0,0,0,0), RS | RT | SIMM }, 153 { insn_ll, M(ll_op,0,0,0,0,0), RS | RT | SIMM }, 154 { insn_lld, M(lld_op,0,0,0,0,0), RS | RT | SIMM }, 155 { insn_lui, M(lui_op,0,0,0,0,0), RT | SIMM }, 156 { insn_lw, M(lw_op,0,0,0,0,0), RS | RT | SIMM }, 157 { insn_mfc0, M(cop0_op,mfc_op,0,0,0,0), RT | RD | SET}, 158 { insn_mtc0, M(cop0_op,mtc_op,0,0,0,0), RT | RD | SET}, 159 { insn_ori, M(ori_op,0,0,0,0,0), RS | RT | UIMM }, 160 { insn_rfe, M(cop0_op,cop_op,0,0,0,rfe_op), 0 }, 161 { insn_sc, M(sc_op,0,0,0,0,0), RS | RT | SIMM }, 162 { insn_scd, M(scd_op,0,0,0,0,0), RS | RT | SIMM }, 163 { insn_sd, M(sd_op,0,0,0,0,0), RS | RT | SIMM }, 164 { insn_sll, M(spec_op,0,0,0,0,sll_op), RT | RD | RE }, 165 { insn_sra, M(spec_op,0,0,0,0,sra_op), RT | RD | RE }, 166 { insn_srl, M(spec_op,0,0,0,0,srl_op), RT | RD | RE }, 167 { insn_subu, M(spec_op,0,0,0,0,subu_op), RS | RT | RD }, 168 { insn_sw, M(sw_op,0,0,0,0,0), RS | RT | SIMM }, 169 { insn_tlbp, M(cop0_op,cop_op,0,0,0,tlbp_op), 0 }, 170 { insn_tlbwi, M(cop0_op,cop_op,0,0,0,tlbwi_op), 0 }, 171 { insn_tlbwr, M(cop0_op,cop_op,0,0,0,tlbwr_op), 0 }, 172 { insn_xor, M(spec_op,0,0,0,0,xor_op), RS | RT | RD }, 173 { insn_xori, M(xori_op,0,0,0,0,0), RS | RT | UIMM }, 174 { insn_invalid, 0, 0 } 175}; 176 177#undef M 178 179static __init u32 build_rs(u32 arg) 180{ 181 if (arg & ~RS_MASK) 182 printk(KERN_WARNING "TLB synthesizer field overflow\n"); 183 184 return (arg & RS_MASK) << RS_SH; 185} 186 187static __init u32 build_rt(u32 arg) 188{ 189 if (arg & ~RT_MASK) 190 printk(KERN_WARNING "TLB synthesizer field overflow\n"); 191 192 return (arg & RT_MASK) << RT_SH; 193} 194 195static __init u32 build_rd(u32 arg) 196{ 197 if (arg & ~RD_MASK) 198 printk(KERN_WARNING "TLB synthesizer field overflow\n"); 199 200 return (arg & RD_MASK) << RD_SH; 201} 202 203static __init u32 build_re(u32 arg) 204{ 205 if (arg & ~RE_MASK) 206 printk(KERN_WARNING "TLB synthesizer field overflow\n"); 207 208 return (arg & RE_MASK) << RE_SH; 209} 210 211static __init u32 build_simm(s32 arg) 212{ 213 if (arg > 0x7fff || arg < -0x8000) 214 printk(KERN_WARNING "TLB synthesizer field overflow\n"); 215 216 return arg & 0xffff; 217} 218 219static __init u32 build_uimm(u32 arg) 220{ 221 if (arg & ~IMM_MASK) 222 printk(KERN_WARNING "TLB synthesizer field overflow\n"); 223 224 return arg & IMM_MASK; 225} 226 227static __init u32 build_bimm(s32 arg) 228{ 229 if (arg > 0x1ffff || arg < -0x20000) 230 printk(KERN_WARNING "TLB synthesizer field overflow\n"); 231 232 if (arg & 0x3) 233 printk(KERN_WARNING "Invalid TLB synthesizer branch target\n"); 234 235 return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff); 236} 237 238static __init u32 build_jimm(u32 arg) 239{ 240 if (arg & ~((JIMM_MASK) << 2)) 241 printk(KERN_WARNING "TLB synthesizer field overflow\n"); 242 243 return (arg >> 2) & JIMM_MASK; 244} 245 246static __init u32 build_func(u32 arg) 247{ 248 if (arg & ~FUNC_MASK) 249 printk(KERN_WARNING "TLB synthesizer field overflow\n"); 250 251 return arg & FUNC_MASK; 252} 253 254static __init u32 build_set(u32 arg) 255{ 256 if (arg & ~SET_MASK) 257 printk(KERN_WARNING "TLB synthesizer field overflow\n"); 258 259 return arg & SET_MASK; 260} 261 262/* 263 * The order of opcode arguments is implicitly left to right, 264 * starting with RS and ending with FUNC or IMM. 265 */ 266static void __init build_insn(u32 **buf, enum opcode opc, ...) 267{ 268 struct insn *ip = NULL; 269 unsigned int i; 270 va_list ap; 271 u32 op; 272 273 for (i = 0; insn_table[i].opcode != insn_invalid; i++) 274 if (insn_table[i].opcode == opc) { 275 ip = &insn_table[i]; 276 break; 277 } 278 279 if (!ip) 280 panic("Unsupported TLB synthesizer instruction %d", opc); 281 282 op = ip->match; 283 va_start(ap, opc); 284 if (ip->fields & RS) op |= build_rs(va_arg(ap, u32)); 285 if (ip->fields & RT) op |= build_rt(va_arg(ap, u32)); 286 if (ip->fields & RD) op |= build_rd(va_arg(ap, u32)); 287 if (ip->fields & RE) op |= build_re(va_arg(ap, u32)); 288 if (ip->fields & SIMM) op |= build_simm(va_arg(ap, s32)); 289 if (ip->fields & UIMM) op |= build_uimm(va_arg(ap, u32)); 290 if (ip->fields & BIMM) op |= build_bimm(va_arg(ap, s32)); 291 if (ip->fields & JIMM) op |= build_jimm(va_arg(ap, u32)); 292 if (ip->fields & FUNC) op |= build_func(va_arg(ap, u32)); 293 if (ip->fields & SET) op |= build_set(va_arg(ap, u32)); 294 va_end(ap); 295 296 **buf = op; 297 (*buf)++; 298} 299 300#define I_u1u2u3(op) \ 301 static inline void __init i##op(u32 **buf, unsigned int a, \ 302 unsigned int b, unsigned int c) \ 303 { \ 304 build_insn(buf, insn##op, a, b, c); \ 305 } 306 307#define I_u2u1u3(op) \ 308 static inline void __init i##op(u32 **buf, unsigned int a, \ 309 unsigned int b, unsigned int c) \ 310 { \ 311 build_insn(buf, insn##op, b, a, c); \ 312 } 313 314#define I_u3u1u2(op) \ 315 static inline void __init i##op(u32 **buf, unsigned int a, \ 316 unsigned int b, unsigned int c) \ 317 { \ 318 build_insn(buf, insn##op, b, c, a); \ 319 } 320 321#define I_u1u2s3(op) \ 322 static inline void __init i##op(u32 **buf, unsigned int a, \ 323 unsigned int b, signed int c) \ 324 { \ 325 build_insn(buf, insn##op, a, b, c); \ 326 } 327 328#define I_u2s3u1(op) \ 329 static inline void __init i##op(u32 **buf, unsigned int a, \ 330 signed int b, unsigned int c) \ 331 { \ 332 build_insn(buf, insn##op, c, a, b); \ 333 } 334 335#define I_u2u1s3(op) \ 336 static inline void __init i##op(u32 **buf, unsigned int a, \ 337 unsigned int b, signed int c) \ 338 { \ 339 build_insn(buf, insn##op, b, a, c); \ 340 } 341 342#define I_u1u2(op) \ 343 static inline void __init i##op(u32 **buf, unsigned int a, \ 344 unsigned int b) \ 345 { \ 346 build_insn(buf, insn##op, a, b); \ 347 } 348 349#define I_u1s2(op) \ 350 static inline void __init i##op(u32 **buf, unsigned int a, \ 351 signed int b) \ 352 { \ 353 build_insn(buf, insn##op, a, b); \ 354 } 355 356#define I_u1(op) \ 357 static inline void __init i##op(u32 **buf, unsigned int a) \ 358 { \ 359 build_insn(buf, insn##op, a); \ 360 } 361 362#define I_0(op) \ 363 static inline void __init i##op(u32 **buf) \ 364 { \ 365 build_insn(buf, insn##op); \ 366 } 367 368I_u2u1s3(_addiu); 369I_u3u1u2(_addu); 370I_u2u1u3(_andi); 371I_u3u1u2(_and); 372I_u1u2s3(_beq); 373I_u1u2s3(_beql); 374I_u1s2(_bgez); 375I_u1s2(_bgezl); 376I_u1s2(_bltz); 377I_u1s2(_bltzl); 378I_u1u2s3(_bne); 379I_u1u2u3(_dmfc0); 380I_u1u2u3(_dmtc0); 381I_u2u1s3(_daddiu); 382I_u3u1u2(_daddu); 383I_u2u1u3(_dsll); 384I_u2u1u3(_dsll32); 385I_u2u1u3(_dsra); 386I_u2u1u3(_dsrl); 387I_u2u1u3(_dsrl32); 388I_u3u1u2(_dsubu); 389I_0(_eret); 390I_u1(_j); 391I_u1(_jal); 392I_u1(_jr); 393I_u2s3u1(_ld); 394I_u2s3u1(_ll); 395I_u2s3u1(_lld); 396I_u1s2(_lui); 397I_u2s3u1(_lw); 398I_u1u2u3(_mfc0); 399I_u1u2u3(_mtc0); 400I_u2u1u3(_ori); 401I_0(_rfe); 402I_u2s3u1(_sc); 403I_u2s3u1(_scd); 404I_u2s3u1(_sd); 405I_u2u1u3(_sll); 406I_u2u1u3(_sra); 407I_u2u1u3(_srl); 408I_u3u1u2(_subu); 409I_u2s3u1(_sw); 410I_0(_tlbp); 411I_0(_tlbwi); 412I_0(_tlbwr); 413I_u3u1u2(_xor) 414I_u2u1u3(_xori); 415 416/* 417 * handling labels 418 */ 419 420enum label_id { 421 label_invalid, 422 label_second_part, 423 label_leave, 424#ifdef MODULE_START 425 label_module_alloc, 426#endif 427 label_vmalloc, 428 label_vmalloc_done, 429 label_tlbw_hazard, 430 label_split, 431 label_nopage_tlbl, 432 label_nopage_tlbs, 433 label_nopage_tlbm, 434 label_smp_pgtable_change, 435 label_r3000_write_probe_fail, 436}; 437 438struct label { 439 u32 *addr; 440 enum label_id lab; 441}; 442 443static __init void build_label(struct label **lab, u32 *addr, 444 enum label_id l) 445{ 446 (*lab)->addr = addr; 447 (*lab)->lab = l; 448 (*lab)++; 449} 450 451#define L_LA(lb) \ 452 static inline void l##lb(struct label **lab, u32 *addr) \ 453 { \ 454 build_label(lab, addr, label##lb); \ 455 } 456 457L_LA(_second_part) 458L_LA(_leave) 459#ifdef MODULE_START 460L_LA(_module_alloc) 461#endif 462L_LA(_vmalloc) 463L_LA(_vmalloc_done) 464L_LA(_tlbw_hazard) 465L_LA(_split) 466L_LA(_nopage_tlbl) 467L_LA(_nopage_tlbs) 468L_LA(_nopage_tlbm) 469L_LA(_smp_pgtable_change) 470L_LA(_r3000_write_probe_fail) 471 472/* convenience macros for instructions */ 473#ifdef CONFIG_64BIT 474# define i_LW(buf, rs, rt, off) i_ld(buf, rs, rt, off) 475# define i_SW(buf, rs, rt, off) i_sd(buf, rs, rt, off) 476# define i_SLL(buf, rs, rt, sh) i_dsll(buf, rs, rt, sh) 477# define i_SRA(buf, rs, rt, sh) i_dsra(buf, rs, rt, sh) 478# define i_SRL(buf, rs, rt, sh) i_dsrl(buf, rs, rt, sh) 479# define i_MFC0(buf, rt, rd...) i_dmfc0(buf, rt, rd) 480# define i_MTC0(buf, rt, rd...) i_dmtc0(buf, rt, rd) 481# define i_ADDIU(buf, rs, rt, val) i_daddiu(buf, rs, rt, val) 482# define i_ADDU(buf, rs, rt, rd) i_daddu(buf, rs, rt, rd) 483# define i_SUBU(buf, rs, rt, rd) i_dsubu(buf, rs, rt, rd) 484# define i_LL(buf, rs, rt, off) i_lld(buf, rs, rt, off) 485# define i_SC(buf, rs, rt, off) i_scd(buf, rs, rt, off) 486#else 487# define i_LW(buf, rs, rt, off) i_lw(buf, rs, rt, off) 488# define i_SW(buf, rs, rt, off) i_sw(buf, rs, rt, off) 489# define i_SLL(buf, rs, rt, sh) i_sll(buf, rs, rt, sh) 490# define i_SRA(buf, rs, rt, sh) i_sra(buf, rs, rt, sh) 491# define i_SRL(buf, rs, rt, sh) i_srl(buf, rs, rt, sh) 492# define i_MFC0(buf, rt, rd...) i_mfc0(buf, rt, rd) 493# define i_MTC0(buf, rt, rd...) i_mtc0(buf, rt, rd) 494# define i_ADDIU(buf, rs, rt, val) i_addiu(buf, rs, rt, val) 495# define i_ADDU(buf, rs, rt, rd) i_addu(buf, rs, rt, rd) 496# define i_SUBU(buf, rs, rt, rd) i_subu(buf, rs, rt, rd) 497# define i_LL(buf, rs, rt, off) i_ll(buf, rs, rt, off) 498# define i_SC(buf, rs, rt, off) i_sc(buf, rs, rt, off) 499#endif 500 501#define i_b(buf, off) i_beq(buf, 0, 0, off) 502#define i_beqz(buf, rs, off) i_beq(buf, rs, 0, off) 503#define i_beqzl(buf, rs, off) i_beql(buf, rs, 0, off) 504#define i_bnez(buf, rs, off) i_bne(buf, rs, 0, off) 505#define i_bnezl(buf, rs, off) i_bnel(buf, rs, 0, off) 506#define i_move(buf, a, b) i_ADDU(buf, a, 0, b) 507#define i_nop(buf) i_sll(buf, 0, 0, 0) 508#define i_ssnop(buf) i_sll(buf, 0, 0, 1) 509#define i_ehb(buf) i_sll(buf, 0, 0, 3) 510 511#ifdef CONFIG_64BIT 512static __init int __maybe_unused in_compat_space_p(long addr) 513{ 514 /* Is this address in 32bit compat space? */ 515 return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L); 516} 517 518static __init int __maybe_unused rel_highest(long val) 519{ 520 return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000; 521} 522 523static __init int __maybe_unused rel_higher(long val) 524{ 525 return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000; 526} 527#endif 528 529static __init int rel_hi(long val) 530{ 531 return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000; 532} 533 534static __init int rel_lo(long val) 535{ 536 return ((val & 0xffff) ^ 0x8000) - 0x8000; 537} 538 539static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr) 540{ 541#ifdef CONFIG_64BIT 542 if (!in_compat_space_p(addr)) { 543 i_lui(buf, rs, rel_highest(addr)); 544 if (rel_higher(addr)) 545 i_daddiu(buf, rs, rs, rel_higher(addr)); 546 if (rel_hi(addr)) { 547 i_dsll(buf, rs, rs, 16); 548 i_daddiu(buf, rs, rs, rel_hi(addr)); 549 i_dsll(buf, rs, rs, 16); 550 } else 551 i_dsll32(buf, rs, rs, 0); 552 } else 553#endif 554 i_lui(buf, rs, rel_hi(addr)); 555} 556 557static __init void __maybe_unused i_LA(u32 **buf, unsigned int rs, 558 long addr) 559{ 560 i_LA_mostly(buf, rs, addr); 561 if (rel_lo(addr)) 562 i_ADDIU(buf, rs, rs, rel_lo(addr)); 563} 564 565/* 566 * handle relocations 567 */ 568 569struct reloc { 570 u32 *addr; 571 unsigned int type; 572 enum label_id lab; 573}; 574 575static __init void r_mips_pc16(struct reloc **rel, u32 *addr, 576 enum label_id l) 577{ 578 (*rel)->addr = addr; 579 (*rel)->type = R_MIPS_PC16; 580 (*rel)->lab = l; 581 (*rel)++; 582} 583 584static inline void __resolve_relocs(struct reloc *rel, struct label *lab) 585{ 586 long laddr = (long)lab->addr; 587 long raddr = (long)rel->addr; 588 589 switch (rel->type) { 590 case R_MIPS_PC16: 591 *rel->addr |= build_bimm(laddr - (raddr + 4)); 592 break; 593 594 default: 595 panic("Unsupported TLB synthesizer relocation %d", 596 rel->type); 597 } 598} 599 600static __init void resolve_relocs(struct reloc *rel, struct label *lab) 601{ 602 struct label *l; 603 604 for (; rel->lab != label_invalid; rel++) 605 for (l = lab; l->lab != label_invalid; l++) 606 if (rel->lab == l->lab) 607 __resolve_relocs(rel, l); 608} 609 610static __init void move_relocs(struct reloc *rel, u32 *first, u32 *end, 611 long off) 612{ 613 for (; rel->lab != label_invalid; rel++) 614 if (rel->addr >= first && rel->addr < end) 615 rel->addr += off; 616} 617 618static __init void move_labels(struct label *lab, u32 *first, u32 *end, 619 long off) 620{ 621 for (; lab->lab != label_invalid; lab++) 622 if (lab->addr >= first && lab->addr < end) 623 lab->addr += off; 624} 625 626static __init void copy_handler(struct reloc *rel, struct label *lab, 627 u32 *first, u32 *end, u32 *target) 628{ 629 long off = (long)(target - first); 630 631 memcpy(target, first, (end - first) * sizeof(u32)); 632 633 move_relocs(rel, first, end, off); 634 move_labels(lab, first, end, off); 635} 636 637static __init int __maybe_unused insn_has_bdelay(struct reloc *rel, 638 u32 *addr) 639{ 640 for (; rel->lab != label_invalid; rel++) { 641 if (rel->addr == addr 642 && (rel->type == R_MIPS_PC16 643 || rel->type == R_MIPS_26)) 644 return 1; 645 } 646 647 return 0; 648} 649 650/* convenience functions for labeled branches */ 651static void __init __maybe_unused 652 il_bltz(u32 **p, struct reloc **r, unsigned int reg, enum label_id l) 653{ 654 r_mips_pc16(r, *p, l); 655 i_bltz(p, reg, 0); 656} 657 658static void __init __maybe_unused il_b(u32 **p, struct reloc **r, 659 enum label_id l) 660{ 661 r_mips_pc16(r, *p, l); 662 i_b(p, 0); 663} 664 665static void __init il_beqz(u32 **p, struct reloc **r, unsigned int reg, 666 enum label_id l) 667{ 668 r_mips_pc16(r, *p, l); 669 i_beqz(p, reg, 0); 670} 671 672static void __init __maybe_unused 673il_beqzl(u32 **p, struct reloc **r, unsigned int reg, enum label_id l) 674{ 675 r_mips_pc16(r, *p, l); 676 i_beqzl(p, reg, 0); 677} 678 679static void __init il_bnez(u32 **p, struct reloc **r, unsigned int reg, 680 enum label_id l) 681{ 682 r_mips_pc16(r, *p, l); 683 i_bnez(p, reg, 0); 684} 685 686static void __init il_bgezl(u32 **p, struct reloc **r, unsigned int reg, 687 enum label_id l) 688{ 689 r_mips_pc16(r, *p, l); 690 i_bgezl(p, reg, 0); 691} 692 693static void __init __maybe_unused 694il_bgez(u32 **p, struct reloc **r, unsigned int reg, enum label_id l) 695{ 696 r_mips_pc16(r, *p, l); 697 i_bgez(p, reg, 0); 698} 699 700/* The only general purpose registers allowed in TLB handlers. */ 701#define K0 26 702#define K1 27 703 704/* Some CP0 registers */ 705#define C0_INDEX 0, 0 706#define C0_ENTRYLO0 2, 0 707#define C0_TCBIND 2, 2 708#define C0_ENTRYLO1 3, 0 709#define C0_CONTEXT 4, 0 710#define C0_BADVADDR 8, 0 711#define C0_ENTRYHI 10, 0 712#define C0_EPC 14, 0 713#define C0_XCONTEXT 20, 0 714 715#ifdef CONFIG_64BIT 716# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_XCONTEXT) 717#else 718# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_CONTEXT) 719#endif 720 721/* The worst case length of the handler is around 18 instructions for 722 * R3000-style TLBs and up to 63 instructions for R4000-style TLBs. 723 * Maximum space available is 32 instructions for R3000 and 64 724 * instructions for R4000. 725 * 726 * We deliberately chose a buffer size of 128, so we won't scribble 727 * over anything important on overflow before we panic. 728 */ 729static __initdata u32 tlb_handler[128]; 730 731/* simply assume worst case size for labels and relocs */ 732static __initdata struct label labels[128]; 733static __initdata struct reloc relocs[128]; 734 735/* 736 * The R3000 TLB handler is simple. 737 */ 738static void __init build_r3000_tlb_refill_handler(void) 739{ 740 long pgdc = (long)pgd_current; 741 u32 *p; 742 int i; 743 744 memset(tlb_handler, 0, sizeof(tlb_handler)); 745 p = tlb_handler; 746 747 i_mfc0(&p, K0, C0_BADVADDR); 748 i_lui(&p, K1, rel_hi(pgdc)); /* cp0 delay */ 749 i_lw(&p, K1, rel_lo(pgdc), K1); 750 i_srl(&p, K0, K0, 22); /* load delay */ 751 i_sll(&p, K0, K0, 2); 752 i_addu(&p, K1, K1, K0); 753 i_mfc0(&p, K0, C0_CONTEXT); 754 i_lw(&p, K1, 0, K1); /* cp0 delay */ 755 i_andi(&p, K0, K0, 0xffc); /* load delay */ 756 i_addu(&p, K1, K1, K0); 757 i_lw(&p, K0, 0, K1); 758 i_nop(&p); /* load delay */ 759 i_mtc0(&p, K0, C0_ENTRYLO0); 760 i_mfc0(&p, K1, C0_EPC); /* cp0 delay */ 761 i_tlbwr(&p); /* cp0 delay */ 762 i_jr(&p, K1); 763 i_rfe(&p); /* branch delay */ 764 765 if (p > tlb_handler + 32) 766 panic("TLB refill handler space exceeded"); 767 768 pr_info("Synthesized TLB refill handler (%u instructions).\n", 769 (unsigned int)(p - tlb_handler)); 770 771 pr_debug("\t.set push\n"); 772 pr_debug("\t.set noreorder\n"); 773 for (i = 0; i < (p - tlb_handler); i++) 774 pr_debug("\t.word 0x%08x\n", tlb_handler[i]); 775 pr_debug("\t.set pop\n"); 776 777 memcpy((void *)ebase, tlb_handler, 0x80); 778} 779 780/* 781 * The R4000 TLB handler is much more complicated. We have two 782 * consecutive handler areas with 32 instructions space each. 783 * Since they aren't used at the same time, we can overflow in the 784 * other one.To keep things simple, we first assume linear space, 785 * then we relocate it to the final handler layout as needed. 786 */ 787static __initdata u32 final_handler[64]; 788 789static __init void __maybe_unused build_tlb_probe_entry(u32 **p) 790{ 791 switch (current_cpu_data.cputype) { 792 /* Found by experiment: R4600 v2.0 needs this, too. */ 793 case CPU_R4600: 794 case CPU_R5000: 795 case CPU_R5000A: 796 case CPU_NEVADA: 797 i_nop(p); 798 i_tlbp(p); 799 break; 800 801 default: 802 i_tlbp(p); 803 break; 804 } 805} 806 807/* 808 * Write random or indexed TLB entry, and care about the hazards from 809 * the preceeding mtc0 and for the following eret. 810 */ 811enum tlb_write_entry { tlb_random, tlb_indexed }; 812 813static __init void build_tlb_write_entry(u32 **p, struct label **l, 814 struct reloc **r, 815 enum tlb_write_entry wmode) 816{ 817 void(*tlbw)(u32 **) = NULL; 818 819 switch (wmode) { 820 case tlb_random: tlbw = i_tlbwr; break; 821 case tlb_indexed: tlbw = i_tlbwi; break; 822 } 823 824 switch (current_cpu_data.cputype) { 825 case CPU_R4000PC: 826 case CPU_R4000SC: 827 case CPU_R4000MC: 828 case CPU_R4400PC: 829 case CPU_R4400SC: 830 case CPU_R4400MC: 831 /* 832 * This branch uses up a mtc0 hazard nop slot and saves 833 * two nops after the tlbw instruction. 834 */ 835 il_bgezl(p, r, 0, label_tlbw_hazard); 836 tlbw(p); 837 l_tlbw_hazard(l, *p); 838 i_nop(p); 839 break; 840 841 case CPU_R4600: 842 case CPU_R4700: 843 case CPU_R5000: 844 case CPU_R5000A: 845 i_nop(p); 846 tlbw(p); 847 i_nop(p); 848 break; 849 850 case CPU_R4300: 851 case CPU_5KC: 852 case CPU_TX49XX: 853 case CPU_AU1000: 854 case CPU_AU1100: 855 case CPU_AU1500: 856 case CPU_AU1550: 857 case CPU_AU1200: 858 case CPU_PR4450: 859 i_nop(p); 860 tlbw(p); 861 break; 862 863 case CPU_R10000: 864 case CPU_R12000: 865 case CPU_R14000: 866 case CPU_4KC: 867 case CPU_SB1: 868 case CPU_SB1A: 869 case CPU_4KSC: 870 case CPU_20KC: 871 case CPU_25KF: 872 case CPU_BCM4710: 873 case CPU_BCM3302: 874 tlbw(p); 875 break; 876 877 case CPU_NEVADA: 878 i_nop(p); /* QED specifies 2 nops hazard */ 879 /* 880 * This branch uses up a mtc0 hazard nop slot and saves 881 * a nop after the tlbw instruction. 882 */ 883 il_bgezl(p, r, 0, label_tlbw_hazard); 884 tlbw(p); 885 l_tlbw_hazard(l, *p); 886 break; 887 888 case CPU_RM7000: 889 i_nop(p); 890 i_nop(p); 891 i_nop(p); 892 i_nop(p); 893 tlbw(p); 894 break; 895 896 case CPU_4KEC: 897 case CPU_24K: 898 case CPU_34K: 899 case CPU_74K: 900 i_ehb(p); 901 tlbw(p); 902 break; 903 904 case CPU_RM9000: 905 /* 906 * When the JTLB is updated by tlbwi or tlbwr, a subsequent 907 * use of the JTLB for instructions should not occur for 4 908 * cpu cycles and use for data translations should not occur 909 * for 3 cpu cycles. 910 */ 911 i_ssnop(p); 912 i_ssnop(p); 913 i_ssnop(p); 914 i_ssnop(p); 915 tlbw(p); 916 i_ssnop(p); 917 i_ssnop(p); 918 i_ssnop(p); 919 i_ssnop(p); 920 break; 921 922 case CPU_VR4111: 923 case CPU_VR4121: 924 case CPU_VR4122: 925 case CPU_VR4181: 926 case CPU_VR4181A: 927 i_nop(p); 928 i_nop(p); 929 tlbw(p); 930 i_nop(p); 931 i_nop(p); 932 break; 933 934 case CPU_VR4131: 935 case CPU_VR4133: 936 case CPU_R5432: 937 i_nop(p); 938 i_nop(p); 939 tlbw(p); 940 break; 941 942 default: 943 panic("No TLB refill handler yet (CPU type: %d)", 944 current_cpu_data.cputype); 945 break; 946 } 947} 948 949#ifdef CONFIG_64BIT 950/* 951 * TMP and PTR are scratch. 952 * TMP will be clobbered, PTR will hold the pmd entry. 953 */ 954static __init void 955build_get_pmde64(u32 **p, struct label **l, struct reloc **r, 956 unsigned int tmp, unsigned int ptr) 957{ 958 long pgdc = (long)pgd_current; 959 960 /* 961 * The vmalloc handling is not in the hotpath. 962 */ 963 i_dmfc0(p, tmp, C0_BADVADDR); 964#ifdef MODULE_START 965 il_bltz(p, r, tmp, label_module_alloc); 966#else 967 il_bltz(p, r, tmp, label_vmalloc); 968#endif 969 /* No i_nop needed here, since the next insn doesn't touch TMP. */ 970 971#ifdef CONFIG_SMP 972# ifdef CONFIG_MIPS_MT_SMTC 973 /* 974 * SMTC uses TCBind value as "CPU" index 975 */ 976 i_mfc0(p, ptr, C0_TCBIND); 977 i_dsrl(p, ptr, ptr, 19); 978# else 979 /* 980 * 64 bit SMP running in XKPHYS has smp_processor_id() << 3 981 * stored in CONTEXT. 982 */ 983 i_dmfc0(p, ptr, C0_CONTEXT); 984 i_dsrl(p, ptr, ptr, 23); 985#endif 986 i_LA_mostly(p, tmp, pgdc); 987 i_daddu(p, ptr, ptr, tmp); 988 i_dmfc0(p, tmp, C0_BADVADDR); 989 i_ld(p, ptr, rel_lo(pgdc), ptr); 990#else 991 i_LA_mostly(p, ptr, pgdc); 992 i_ld(p, ptr, rel_lo(pgdc), ptr); 993#endif 994 995 l_vmalloc_done(l, *p); 996 997 if (PGDIR_SHIFT - 3 < 32) /* get pgd offset in bytes */ 998 i_dsrl(p, tmp, tmp, PGDIR_SHIFT-3); 999 else 1000 i_dsrl32(p, tmp, tmp, PGDIR_SHIFT - 3 - 32); 1001 1002 i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3); 1003 i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */ 1004 i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ 1005 i_ld(p, ptr, 0, ptr); /* get pmd pointer */ 1006 i_dsrl(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */ 1007 i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3); 1008 i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */ 1009} 1010 1011/* 1012 * BVADDR is the faulting address, PTR is scratch. 1013 * PTR will hold the pgd for vmalloc. 1014 */ 1015static __init void 1016build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r, 1017 unsigned int bvaddr, unsigned int ptr) 1018{ 1019 long swpd = (long)swapper_pg_dir; 1020 1021#ifdef MODULE_START 1022 long modd = (long)module_pg_dir; 1023 1024 l_module_alloc(l, *p); 1025 /* 1026 * Assumption: 1027 * VMALLOC_START >= 0xc000000000000000UL 1028 * MODULE_START >= 0xe000000000000000UL 1029 */ 1030 i_SLL(p, ptr, bvaddr, 2); 1031 il_bgez(p, r, ptr, label_vmalloc); 1032 1033 if (in_compat_space_p(MODULE_START) && !rel_lo(MODULE_START)) { 1034 i_lui(p, ptr, rel_hi(MODULE_START)); /* delay slot */ 1035 } else { 1036 /* unlikely configuration */ 1037 i_nop(p); /* delay slot */ 1038 i_LA(p, ptr, MODULE_START); 1039 } 1040 i_dsubu(p, bvaddr, bvaddr, ptr); 1041 1042 if (in_compat_space_p(modd) && !rel_lo(modd)) { 1043 il_b(p, r, label_vmalloc_done); 1044 i_lui(p, ptr, rel_hi(modd)); 1045 } else { 1046 i_LA_mostly(p, ptr, modd); 1047 il_b(p, r, label_vmalloc_done); 1048 i_daddiu(p, ptr, ptr, rel_lo(modd)); 1049 } 1050 1051 l_vmalloc(l, *p); 1052 if (in_compat_space_p(MODULE_START) && !rel_lo(MODULE_START) && 1053 MODULE_START << 32 == VMALLOC_START) 1054 i_dsll32(p, ptr, ptr, 0); /* typical case */ 1055 else 1056 i_LA(p, ptr, VMALLOC_START); 1057#else 1058 l_vmalloc(l, *p); 1059 i_LA(p, ptr, VMALLOC_START); 1060#endif 1061 i_dsubu(p, bvaddr, bvaddr, ptr); 1062 1063 if (in_compat_space_p(swpd) && !rel_lo(swpd)) { 1064 il_b(p, r, label_vmalloc_done); 1065 i_lui(p, ptr, rel_hi(swpd)); 1066 } else { 1067 i_LA_mostly(p, ptr, swpd); 1068 il_b(p, r, label_vmalloc_done); 1069 i_daddiu(p, ptr, ptr, rel_lo(swpd)); 1070 } 1071} 1072 1073#else /* !CONFIG_64BIT */ 1074 1075/* 1076 * TMP and PTR are scratch. 1077 * TMP will be clobbered, PTR will hold the pgd entry. 1078 */ 1079static __init void __maybe_unused 1080build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr) 1081{ 1082 long pgdc = (long)pgd_current; 1083 1084 /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ 1085#ifdef CONFIG_SMP 1086#ifdef CONFIG_MIPS_MT_SMTC 1087 /* 1088 * SMTC uses TCBind value as "CPU" index 1089 */ 1090 i_mfc0(p, ptr, C0_TCBIND); 1091 i_LA_mostly(p, tmp, pgdc); 1092 i_srl(p, ptr, ptr, 19); 1093#else 1094 /* 1095 * smp_processor_id() << 3 is stored in CONTEXT. 1096 */ 1097 i_mfc0(p, ptr, C0_CONTEXT); 1098 i_LA_mostly(p, tmp, pgdc); 1099 i_srl(p, ptr, ptr, 23); 1100#endif 1101 i_addu(p, ptr, tmp, ptr); 1102#else 1103 i_LA_mostly(p, ptr, pgdc); 1104#endif 1105 i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ 1106 i_lw(p, ptr, rel_lo(pgdc), ptr); 1107 i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */ 1108 i_sll(p, tmp, tmp, PGD_T_LOG2); 1109 i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ 1110} 1111 1112#endif /* !CONFIG_64BIT */ 1113 1114static __init void build_adjust_context(u32 **p, unsigned int ctx) 1115{ 1116 unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12; 1117 unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1); 1118 1119 switch (current_cpu_data.cputype) { 1120 case CPU_VR41XX: 1121 case CPU_VR4111: 1122 case CPU_VR4121: 1123 case CPU_VR4122: 1124 case CPU_VR4131: 1125 case CPU_VR4181: 1126 case CPU_VR4181A: 1127 case CPU_VR4133: 1128 shift += 2; 1129 break; 1130 1131 default: 1132 break; 1133 } 1134 1135 if (shift) 1136 i_SRL(p, ctx, ctx, shift); 1137 i_andi(p, ctx, ctx, mask); 1138} 1139 1140static __init void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) 1141{ 1142 switch (current_cpu_data.cputype) { 1143 case CPU_NEVADA: 1144 i_LW(p, ptr, 0, ptr); 1145 GET_CONTEXT(p, tmp); /* get context reg */ 1146 break; 1147 1148 default: 1149 GET_CONTEXT(p, tmp); /* get context reg */ 1150 i_LW(p, ptr, 0, ptr); 1151 break; 1152 } 1153 1154 build_adjust_context(p, tmp); 1155 i_ADDU(p, ptr, ptr, tmp); /* add in offset */ 1156} 1157 1158static __init void build_update_entries(u32 **p, unsigned int tmp, 1159 unsigned int ptep) 1160{ 1161 /* 1162 * 64bit address support (36bit on a 32bit CPU) in a 32bit 1163 * Kernel is a special case. Only a few CPUs use it. 1164 */ 1165#ifdef CONFIG_64BIT_PHYS_ADDR 1166 if (cpu_has_64bits) { 1167 i_ld(p, tmp, 0, ptep); /* get even pte */ 1168 i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ 1169 i_dsrl(p, tmp, tmp, 6); /* convert to entrylo0 */ 1170 i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ 1171 i_dsrl(p, ptep, ptep, 6); /* convert to entrylo1 */ 1172 i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ 1173 } else { 1174 int pte_off_even = sizeof(pte_t) / 2; 1175 int pte_off_odd = pte_off_even + sizeof(pte_t); 1176 1177 /* The pte entries are pre-shifted */ 1178 i_lw(p, tmp, pte_off_even, ptep); /* get even pte */ 1179 i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ 1180 i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */ 1181 i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ 1182 } 1183#else 1184 i_LW(p, tmp, 0, ptep); /* get even pte */ 1185 i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ 1186 if (r45k_bvahwbug()) 1187 build_tlb_probe_entry(p); 1188 i_SRL(p, tmp, tmp, 6); /* convert to entrylo0 */ 1189 if (r4k_250MHZhwbug()) 1190 i_mtc0(p, 0, C0_ENTRYLO0); 1191 i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ 1192 i_SRL(p, ptep, ptep, 6); /* convert to entrylo1 */ 1193 if (r45k_bvahwbug()) 1194 i_mfc0(p, tmp, C0_INDEX); 1195 if (r4k_250MHZhwbug()) 1196 i_mtc0(p, 0, C0_ENTRYLO1); 1197 i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ 1198#endif 1199} 1200 1201static void __init build_r4000_tlb_refill_handler(void) 1202{ 1203 u32 *p = tlb_handler; 1204 struct label *l = labels; 1205 struct reloc *r = relocs; 1206 u32 *f; 1207 unsigned int final_len; 1208 int i; 1209 1210 memset(tlb_handler, 0, sizeof(tlb_handler)); 1211 memset(labels, 0, sizeof(labels)); 1212 memset(relocs, 0, sizeof(relocs)); 1213 memset(final_handler, 0, sizeof(final_handler)); 1214 1215 /* 1216 * create the plain linear handler 1217 */ 1218 if (bcm1250_m3_war()) { 1219 i_MFC0(&p, K0, C0_BADVADDR); 1220 i_MFC0(&p, K1, C0_ENTRYHI); 1221 i_xor(&p, K0, K0, K1); 1222 i_SRL(&p, K0, K0, PAGE_SHIFT + 1); 1223 il_bnez(&p, &r, K0, label_leave); 1224 /* No need for i_nop */ 1225 } 1226 1227#ifdef CONFIG_64BIT 1228 build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ 1229#else 1230 build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ 1231#endif 1232 1233 build_get_ptep(&p, K0, K1); 1234 build_update_entries(&p, K0, K1); 1235 build_tlb_write_entry(&p, &l, &r, tlb_random); 1236 l_leave(&l, p); 1237 i_eret(&p); /* return from trap */ 1238 1239#ifdef CONFIG_64BIT 1240 build_get_pgd_vmalloc64(&p, &l, &r, K0, K1); 1241#endif 1242 1243 /* 1244 * Overflow check: For the 64bit handler, we need at least one 1245 * free instruction slot for the wrap-around branch. In worst 1246 * case, if the intended insertion point is a delay slot, we 1247 * need three, with the second nop'ed and the third being 1248 * unused. 1249 */ 1250#ifdef CONFIG_32BIT 1251 if ((p - tlb_handler) > 64) 1252 panic("TLB refill handler space exceeded"); 1253#else 1254 if (((p - tlb_handler) > 63) 1255 || (((p - tlb_handler) > 61) 1256 && insn_has_bdelay(relocs, tlb_handler + 29))) 1257 panic("TLB refill handler space exceeded"); 1258#endif 1259 1260 /* 1261 * Now fold the handler in the TLB refill handler space. 1262 */ 1263#ifdef CONFIG_32BIT 1264 f = final_handler; 1265 /* Simplest case, just copy the handler. */ 1266 copy_handler(relocs, labels, tlb_handler, p, f); 1267 final_len = p - tlb_handler; 1268#else /* CONFIG_64BIT */ 1269 f = final_handler + 32; 1270 if ((p - tlb_handler) <= 32) { 1271 /* Just copy the handler. */ 1272 copy_handler(relocs, labels, tlb_handler, p, f); 1273 final_len = p - tlb_handler; 1274 } else { 1275 u32 *split = tlb_handler + 30; 1276 1277 /* 1278 * Find the split point. 1279 */ 1280 if (insn_has_bdelay(relocs, split - 1)) 1281 split--; 1282 1283 /* Copy first part of the handler. */ 1284 copy_handler(relocs, labels, tlb_handler, split, f); 1285 f += split - tlb_handler; 1286 1287 /* Insert branch. */ 1288 l_split(&l, final_handler); 1289 il_b(&f, &r, label_split); 1290 if (insn_has_bdelay(relocs, split)) 1291 i_nop(&f); 1292 else { 1293 copy_handler(relocs, labels, split, split + 1, f); 1294 move_labels(labels, f, f + 1, -1); 1295 f++; 1296 split++; 1297 } 1298 1299 /* Copy the rest of the handler. */ 1300 copy_handler(relocs, labels, split, p, final_handler); 1301 final_len = (f - (final_handler + 32)) + (p - split); 1302 } 1303#endif /* CONFIG_64BIT */ 1304 1305 resolve_relocs(relocs, labels); 1306 pr_info("Synthesized TLB refill handler (%u instructions).\n", 1307 final_len); 1308 1309 f = final_handler; 1310#ifdef CONFIG_64BIT 1311 if (final_len > 32) 1312 final_len = 64; 1313 else 1314 f = final_handler + 32; 1315#endif /* CONFIG_64BIT */ 1316 pr_debug("\t.set push\n"); 1317 pr_debug("\t.set noreorder\n"); 1318 for (i = 0; i < final_len; i++) 1319 pr_debug("\t.word 0x%08x\n", f[i]); 1320 pr_debug("\t.set pop\n"); 1321 1322 memcpy((void *)ebase, final_handler, 0x100); 1323} 1324 1325/* 1326 * TLB load/store/modify handlers. 1327 * 1328 * Only the fastpath gets synthesized at runtime, the slowpath for 1329 * do_page_fault remains normal asm. 1330 */ 1331extern void tlb_do_page_fault_0(void); 1332extern void tlb_do_page_fault_1(void); 1333 1334#define __tlb_handler_align \ 1335 __attribute__((__aligned__(1 << CONFIG_MIPS_L1_CACHE_SHIFT))) 1336 1337/* 1338 * 128 instructions for the fastpath handler is generous and should 1339 * never be exceeded. 1340 */ 1341#define FASTPATH_SIZE 128 1342 1343u32 __tlb_handler_align handle_tlbl[FASTPATH_SIZE]; 1344u32 __tlb_handler_align handle_tlbs[FASTPATH_SIZE]; 1345u32 __tlb_handler_align handle_tlbm[FASTPATH_SIZE]; 1346 1347static void __init 1348iPTE_LW(u32 **p, struct label **l, unsigned int pte, unsigned int ptr) 1349{ 1350#ifdef CONFIG_SMP 1351# ifdef CONFIG_64BIT_PHYS_ADDR 1352 if (cpu_has_64bits) 1353 i_lld(p, pte, 0, ptr); 1354 else 1355# endif 1356 i_LL(p, pte, 0, ptr); 1357#else 1358# ifdef CONFIG_64BIT_PHYS_ADDR 1359 if (cpu_has_64bits) 1360 i_ld(p, pte, 0, ptr); 1361 else 1362# endif 1363 i_LW(p, pte, 0, ptr); 1364#endif 1365} 1366 1367static void __init 1368iPTE_SW(u32 **p, struct reloc **r, unsigned int pte, unsigned int ptr, 1369 unsigned int mode) 1370{ 1371#ifdef CONFIG_64BIT_PHYS_ADDR 1372 unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY); 1373#endif 1374 1375 i_ori(p, pte, pte, mode); 1376#ifdef CONFIG_SMP 1377# ifdef CONFIG_64BIT_PHYS_ADDR 1378 if (cpu_has_64bits) 1379 i_scd(p, pte, 0, ptr); 1380 else 1381# endif 1382 i_SC(p, pte, 0, ptr); 1383 1384 if (r10000_llsc_war()) 1385 il_beqzl(p, r, pte, label_smp_pgtable_change); 1386 else 1387 il_beqz(p, r, pte, label_smp_pgtable_change); 1388 1389# ifdef CONFIG_64BIT_PHYS_ADDR 1390 if (!cpu_has_64bits) { 1391 /* no i_nop needed */ 1392 i_ll(p, pte, sizeof(pte_t) / 2, ptr); 1393 i_ori(p, pte, pte, hwmode); 1394 i_sc(p, pte, sizeof(pte_t) / 2, ptr); 1395 il_beqz(p, r, pte, label_smp_pgtable_change); 1396 /* no i_nop needed */ 1397 i_lw(p, pte, 0, ptr); 1398 } else 1399 i_nop(p); 1400# else 1401 i_nop(p); 1402# endif 1403#else 1404# ifdef CONFIG_64BIT_PHYS_ADDR 1405 if (cpu_has_64bits) 1406 i_sd(p, pte, 0, ptr); 1407 else 1408# endif 1409 i_SW(p, pte, 0, ptr); 1410 1411# ifdef CONFIG_64BIT_PHYS_ADDR 1412 if (!cpu_has_64bits) { 1413 i_lw(p, pte, sizeof(pte_t) / 2, ptr); 1414 i_ori(p, pte, pte, hwmode); 1415 i_sw(p, pte, sizeof(pte_t) / 2, ptr); 1416 i_lw(p, pte, 0, ptr); 1417 } 1418# endif 1419#endif 1420} 1421 1422/* 1423 * Check if PTE is present, if not then jump to LABEL. PTR points to 1424 * the page table where this PTE is located, PTE will be re-loaded 1425 * with it's original value. 1426 */ 1427static void __init 1428build_pte_present(u32 **p, struct label **l, struct reloc **r, 1429 unsigned int pte, unsigned int ptr, enum label_id lid) 1430{ 1431 i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_READ); 1432 i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_READ); 1433 il_bnez(p, r, pte, lid); 1434 iPTE_LW(p, l, pte, ptr); 1435} 1436 1437/* Make PTE valid, store result in PTR. */ 1438static void __init 1439build_make_valid(u32 **p, struct reloc **r, unsigned int pte, 1440 unsigned int ptr) 1441{ 1442 unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED; 1443 1444 iPTE_SW(p, r, pte, ptr, mode); 1445} 1446 1447/* 1448 * Check if PTE can be written to, if not branch to LABEL. Regardless 1449 * restore PTE with value from PTR when done. 1450 */ 1451static void __init 1452build_pte_writable(u32 **p, struct label **l, struct reloc **r, 1453 unsigned int pte, unsigned int ptr, enum label_id lid) 1454{ 1455 i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE); 1456 i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE); 1457 il_bnez(p, r, pte, lid); 1458 iPTE_LW(p, l, pte, ptr); 1459} 1460 1461/* Make PTE writable, update software status bits as well, then store 1462 * at PTR. 1463 */ 1464static void __init 1465build_make_write(u32 **p, struct reloc **r, unsigned int pte, 1466 unsigned int ptr) 1467{ 1468 unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID 1469 | _PAGE_DIRTY); 1470 1471 iPTE_SW(p, r, pte, ptr, mode); 1472} 1473 1474/* 1475 * Check if PTE can be modified, if not branch to LABEL. Regardless 1476 * restore PTE with value from PTR when done. 1477 */ 1478static void __init 1479build_pte_modifiable(u32 **p, struct label **l, struct reloc **r, 1480 unsigned int pte, unsigned int ptr, enum label_id lid) 1481{ 1482 i_andi(p, pte, pte, _PAGE_WRITE); 1483 il_beqz(p, r, pte, lid); 1484 iPTE_LW(p, l, pte, ptr); 1485} 1486 1487/* 1488 * R3000 style TLB load/store/modify handlers. 1489 */ 1490 1491/* 1492 * This places the pte into ENTRYLO0 and writes it with tlbwi. 1493 * Then it returns. 1494 */ 1495static void __init 1496build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp) 1497{ 1498 i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ 1499 i_mfc0(p, tmp, C0_EPC); /* cp0 delay */ 1500 i_tlbwi(p); 1501 i_jr(p, tmp); 1502 i_rfe(p); /* branch delay */ 1503} 1504 1505/* 1506 * This places the pte into ENTRYLO0 and writes it with tlbwi 1507 * or tlbwr as appropriate. This is because the index register 1508 * may have the probe fail bit set as a result of a trap on a 1509 * kseg2 access, i.e. without refill. Then it returns. 1510 */ 1511static void __init 1512build_r3000_tlb_reload_write(u32 **p, struct label **l, struct reloc **r, 1513 unsigned int pte, unsigned int tmp) 1514{ 1515 i_mfc0(p, tmp, C0_INDEX); 1516 i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ 1517 il_bltz(p, r, tmp, label_r3000_write_probe_fail); /* cp0 delay */ 1518 i_mfc0(p, tmp, C0_EPC); /* branch delay */ 1519 i_tlbwi(p); /* cp0 delay */ 1520 i_jr(p, tmp); 1521 i_rfe(p); /* branch delay */ 1522 l_r3000_write_probe_fail(l, *p); 1523 i_tlbwr(p); /* cp0 delay */ 1524 i_jr(p, tmp); 1525 i_rfe(p); /* branch delay */ 1526} 1527 1528static void __init 1529build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, 1530 unsigned int ptr) 1531{ 1532 long pgdc = (long)pgd_current; 1533 1534 i_mfc0(p, pte, C0_BADVADDR); 1535 i_lui(p, ptr, rel_hi(pgdc)); /* cp0 delay */ 1536 i_lw(p, ptr, rel_lo(pgdc), ptr); 1537 i_srl(p, pte, pte, 22); /* load delay */ 1538 i_sll(p, pte, pte, 2); 1539 i_addu(p, ptr, ptr, pte); 1540 i_mfc0(p, pte, C0_CONTEXT); 1541 i_lw(p, ptr, 0, ptr); /* cp0 delay */ 1542 i_andi(p, pte, pte, 0xffc); /* load delay */ 1543 i_addu(p, ptr, ptr, pte); 1544 i_lw(p, pte, 0, ptr); 1545 i_tlbp(p); /* load delay */ 1546} 1547 1548static void __init build_r3000_tlb_load_handler(void) 1549{ 1550 u32 *p = handle_tlbl; 1551 struct label *l = labels; 1552 struct reloc *r = relocs; 1553 int i; 1554 1555 memset(handle_tlbl, 0, sizeof(handle_tlbl)); 1556 memset(labels, 0, sizeof(labels)); 1557 memset(relocs, 0, sizeof(relocs)); 1558 1559 build_r3000_tlbchange_handler_head(&p, K0, K1); 1560 build_pte_present(&p, &l, &r, K0, K1, label_nopage_tlbl); 1561 i_nop(&p); /* load delay */ 1562 build_make_valid(&p, &r, K0, K1); 1563 build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); 1564 1565 l_nopage_tlbl(&l, p); 1566 i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); 1567 i_nop(&p); 1568 1569 if ((p - handle_tlbl) > FASTPATH_SIZE) 1570 panic("TLB load handler fastpath space exceeded"); 1571 1572 resolve_relocs(relocs, labels); 1573 pr_info("Synthesized TLB load handler fastpath (%u instructions).\n", 1574 (unsigned int)(p - handle_tlbl)); 1575 1576 pr_debug("\t.set push\n"); 1577 pr_debug("\t.set noreorder\n"); 1578 for (i = 0; i < (p - handle_tlbl); i++) 1579 pr_debug("\t.word 0x%08x\n", handle_tlbl[i]); 1580 pr_debug("\t.set pop\n"); 1581} 1582 1583static void __init build_r3000_tlb_store_handler(void) 1584{ 1585 u32 *p = handle_tlbs; 1586 struct label *l = labels; 1587 struct reloc *r = relocs; 1588 int i; 1589 1590 memset(handle_tlbs, 0, sizeof(handle_tlbs)); 1591 memset(labels, 0, sizeof(labels)); 1592 memset(relocs, 0, sizeof(relocs)); 1593 1594 build_r3000_tlbchange_handler_head(&p, K0, K1); 1595 build_pte_writable(&p, &l, &r, K0, K1, label_nopage_tlbs); 1596 i_nop(&p); /* load delay */ 1597 build_make_write(&p, &r, K0, K1); 1598 build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); 1599 1600 l_nopage_tlbs(&l, p); 1601 i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 1602 i_nop(&p); 1603 1604 if ((p - handle_tlbs) > FASTPATH_SIZE) 1605 panic("TLB store handler fastpath space exceeded"); 1606 1607 resolve_relocs(relocs, labels); 1608 pr_info("Synthesized TLB store handler fastpath (%u instructions).\n", 1609 (unsigned int)(p - handle_tlbs)); 1610 1611 pr_debug("\t.set push\n"); 1612 pr_debug("\t.set noreorder\n"); 1613 for (i = 0; i < (p - handle_tlbs); i++) 1614 pr_debug("\t.word 0x%08x\n", handle_tlbs[i]); 1615 pr_debug("\t.set pop\n"); 1616} 1617 1618static void __init build_r3000_tlb_modify_handler(void) 1619{ 1620 u32 *p = handle_tlbm; 1621 struct label *l = labels; 1622 struct reloc *r = relocs; 1623 int i; 1624 1625 memset(handle_tlbm, 0, sizeof(handle_tlbm)); 1626 memset(labels, 0, sizeof(labels)); 1627 memset(relocs, 0, sizeof(relocs)); 1628 1629 build_r3000_tlbchange_handler_head(&p, K0, K1); 1630 build_pte_modifiable(&p, &l, &r, K0, K1, label_nopage_tlbm); 1631 i_nop(&p); /* load delay */ 1632 build_make_write(&p, &r, K0, K1); 1633 build_r3000_pte_reload_tlbwi(&p, K0, K1); 1634 1635 l_nopage_tlbm(&l, p); 1636 i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 1637 i_nop(&p); 1638 1639 if ((p - handle_tlbm) > FASTPATH_SIZE) 1640 panic("TLB modify handler fastpath space exceeded"); 1641 1642 resolve_relocs(relocs, labels); 1643 pr_info("Synthesized TLB modify handler fastpath (%u instructions).\n", 1644 (unsigned int)(p - handle_tlbm)); 1645 1646 pr_debug("\t.set push\n"); 1647 pr_debug("\t.set noreorder\n"); 1648 for (i = 0; i < (p - handle_tlbm); i++) 1649 pr_debug("\t.word 0x%08x\n", handle_tlbm[i]); 1650 pr_debug("\t.set pop\n"); 1651} 1652 1653/* 1654 * R4000 style TLB load/store/modify handlers. 1655 */ 1656static void __init 1657build_r4000_tlbchange_handler_head(u32 **p, struct label **l, 1658 struct reloc **r, unsigned int pte, 1659 unsigned int ptr) 1660{ 1661#ifdef CONFIG_64BIT 1662 build_get_pmde64(p, l, r, pte, ptr); /* get pmd in ptr */ 1663#else 1664 build_get_pgde32(p, pte, ptr); /* get pgd in ptr */ 1665#endif 1666 1667 i_MFC0(p, pte, C0_BADVADDR); 1668 i_LW(p, ptr, 0, ptr); 1669 i_SRL(p, pte, pte, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); 1670 i_andi(p, pte, pte, (PTRS_PER_PTE - 1) << PTE_T_LOG2); 1671 i_ADDU(p, ptr, ptr, pte); 1672 1673#ifdef CONFIG_SMP 1674 l_smp_pgtable_change(l, *p); 1675# endif 1676 iPTE_LW(p, l, pte, ptr); /* get even pte */ 1677 build_tlb_probe_entry(p); 1678} 1679 1680static void __init 1681build_r4000_tlbchange_handler_tail(u32 **p, struct label **l, 1682 struct reloc **r, unsigned int tmp, 1683 unsigned int ptr) 1684{ 1685 i_ori(p, ptr, ptr, sizeof(pte_t)); 1686 i_xori(p, ptr, ptr, sizeof(pte_t)); 1687 build_update_entries(p, tmp, ptr); 1688 build_tlb_write_entry(p, l, r, tlb_indexed); 1689 l_leave(l, *p); 1690 i_eret(p); /* return from trap */ 1691 1692#ifdef CONFIG_64BIT 1693 build_get_pgd_vmalloc64(p, l, r, tmp, ptr); 1694#endif 1695} 1696 1697static void __init build_r4000_tlb_load_handler(void) 1698{ 1699 u32 *p = handle_tlbl; 1700 struct label *l = labels; 1701 struct reloc *r = relocs; 1702 int i; 1703 1704 memset(handle_tlbl, 0, sizeof(handle_tlbl)); 1705 memset(labels, 0, sizeof(labels)); 1706 memset(relocs, 0, sizeof(relocs)); 1707 1708 if (bcm1250_m3_war()) { 1709 i_MFC0(&p, K0, C0_BADVADDR); 1710 i_MFC0(&p, K1, C0_ENTRYHI); 1711 i_xor(&p, K0, K0, K1); 1712 i_SRL(&p, K0, K0, PAGE_SHIFT + 1); 1713 il_bnez(&p, &r, K0, label_leave); 1714 /* No need for i_nop */ 1715 } 1716 1717 build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); 1718 build_pte_present(&p, &l, &r, K0, K1, label_nopage_tlbl); 1719 build_make_valid(&p, &r, K0, K1); 1720 build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); 1721 1722 l_nopage_tlbl(&l, p); 1723 i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); 1724 i_nop(&p); 1725 1726 if ((p - handle_tlbl) > FASTPATH_SIZE) 1727 panic("TLB load handler fastpath space exceeded"); 1728 1729 resolve_relocs(relocs, labels); 1730 pr_info("Synthesized TLB load handler fastpath (%u instructions).\n", 1731 (unsigned int)(p - handle_tlbl)); 1732 1733 pr_debug("\t.set push\n"); 1734 pr_debug("\t.set noreorder\n"); 1735 for (i = 0; i < (p - handle_tlbl); i++) 1736 pr_debug("\t.word 0x%08x\n", handle_tlbl[i]); 1737 pr_debug("\t.set pop\n"); 1738} 1739 1740static void __init build_r4000_tlb_store_handler(void) 1741{ 1742 u32 *p = handle_tlbs; 1743 struct label *l = labels; 1744 struct reloc *r = relocs; 1745 int i; 1746 1747 memset(handle_tlbs, 0, sizeof(handle_tlbs)); 1748 memset(labels, 0, sizeof(labels)); 1749 memset(relocs, 0, sizeof(relocs)); 1750 1751 build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); 1752 build_pte_writable(&p, &l, &r, K0, K1, label_nopage_tlbs); 1753 build_make_write(&p, &r, K0, K1); 1754 build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); 1755 1756 l_nopage_tlbs(&l, p); 1757 i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 1758 i_nop(&p); 1759 1760 if ((p - handle_tlbs) > FASTPATH_SIZE) 1761 panic("TLB store handler fastpath space exceeded"); 1762 1763 resolve_relocs(relocs, labels); 1764 pr_info("Synthesized TLB store handler fastpath (%u instructions).\n", 1765 (unsigned int)(p - handle_tlbs)); 1766 1767 pr_debug("\t.set push\n"); 1768 pr_debug("\t.set noreorder\n"); 1769 for (i = 0; i < (p - handle_tlbs); i++) 1770 pr_debug("\t.word 0x%08x\n", handle_tlbs[i]); 1771 pr_debug("\t.set pop\n"); 1772} 1773 1774static void __init build_r4000_tlb_modify_handler(void) 1775{ 1776 u32 *p = handle_tlbm; 1777 struct label *l = labels; 1778 struct reloc *r = relocs; 1779 int i; 1780 1781 memset(handle_tlbm, 0, sizeof(handle_tlbm)); 1782 memset(labels, 0, sizeof(labels)); 1783 memset(relocs, 0, sizeof(relocs)); 1784 1785 build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); 1786 build_pte_modifiable(&p, &l, &r, K0, K1, label_nopage_tlbm); 1787 /* Present and writable bits set, set accessed and dirty bits. */ 1788 build_make_write(&p, &r, K0, K1); 1789 build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); 1790 1791 l_nopage_tlbm(&l, p); 1792 i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); 1793 i_nop(&p); 1794 1795 if ((p - handle_tlbm) > FASTPATH_SIZE) 1796 panic("TLB modify handler fastpath space exceeded"); 1797 1798 resolve_relocs(relocs, labels); 1799 pr_info("Synthesized TLB modify handler fastpath (%u instructions).\n", 1800 (unsigned int)(p - handle_tlbm)); 1801 1802 pr_debug("\t.set push\n"); 1803 pr_debug("\t.set noreorder\n"); 1804 for (i = 0; i < (p - handle_tlbm); i++) 1805 pr_debug("\t.word 0x%08x\n", handle_tlbm[i]); 1806 pr_debug("\t.set pop\n"); 1807} 1808 1809void __init build_tlb_refill_handler(void) 1810{ 1811 /* 1812 * The refill handler is generated per-CPU, multi-node systems 1813 * may have local storage for it. The other handlers are only 1814 * needed once. 1815 */ 1816 static int run_once = 0; 1817 1818 switch (current_cpu_data.cputype) { 1819 case CPU_R2000: 1820 case CPU_R3000: 1821 case CPU_R3000A: 1822 case CPU_R3081E: 1823 case CPU_TX3912: 1824 case CPU_TX3922: 1825 case CPU_TX3927: 1826 build_r3000_tlb_refill_handler(); 1827 if (!run_once) { 1828 build_r3000_tlb_load_handler(); 1829 build_r3000_tlb_store_handler(); 1830 build_r3000_tlb_modify_handler(); 1831 run_once++; 1832 } 1833 break; 1834 1835 case CPU_R6000: 1836 case CPU_R6000A: 1837 panic("No R6000 TLB refill handler yet"); 1838 break; 1839 1840 case CPU_R8000: 1841 panic("No R8000 TLB refill handler yet"); 1842 break; 1843 1844 default: 1845 build_r4000_tlb_refill_handler(); 1846 if (!run_once) { 1847 build_r4000_tlb_load_handler(); 1848 build_r4000_tlb_store_handler(); 1849 build_r4000_tlb_modify_handler(); 1850 run_once++; 1851 } 1852 } 1853} 1854 1855void __init flush_tlb_handlers(void) 1856{ 1857 flush_icache_range((unsigned long)handle_tlbl, 1858 (unsigned long)handle_tlbl + sizeof(handle_tlbl)); 1859 flush_icache_range((unsigned long)handle_tlbs, 1860 (unsigned long)handle_tlbs + sizeof(handle_tlbs)); 1861 flush_icache_range((unsigned long)handle_tlbm, 1862 (unsigned long)handle_tlbm + sizeof(handle_tlbm)); 1863} 1864