1/* 2 * Copyright 2010 Tilera Corporation. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation, version 2. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 * NON INFRINGEMENT. See the GNU General Public License for 12 * more details. 13 * 14 * A code-rewriter that enables instruction single-stepping. 15 * Derived from iLib's single-stepping code. 16 */ 17 18#ifndef __tilegx__ /* No support for single-step yet. */ 19 20/* These functions are only used on the TILE platform */ 21#include <linux/slab.h> 22#include <linux/thread_info.h> 23#include <linux/uaccess.h> 24#include <linux/mman.h> 25#include <linux/types.h> 26#include <linux/err.h> 27#include <asm/cacheflush.h> 28#include <asm/opcode-tile.h> 29#include <asm/opcode_constants.h> 30#include <arch/abi.h> 31 32#define signExtend17(val) sign_extend((val), 17) 33#define TILE_X1_MASK (0xffffffffULL << 31) 34 35int unaligned_printk; 36 37static int __init setup_unaligned_printk(char *str) 38{ 39 long val; 40 if (strict_strtol(str, 0, &val) != 0) 41 return 0; 42 unaligned_printk = val; 43 pr_info("Printk for each unaligned data accesses is %s\n", 44 unaligned_printk ? "enabled" : "disabled"); 45 return 1; 46} 47__setup("unaligned_printk=", setup_unaligned_printk); 48 49unsigned int unaligned_fixup_count; 50 51enum mem_op { 52 MEMOP_NONE, 53 MEMOP_LOAD, 54 MEMOP_STORE, 55 MEMOP_LOAD_POSTINCR, 56 MEMOP_STORE_POSTINCR 57}; 58 59static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, int32_t offset) 60{ 61 tile_bundle_bits result; 62 63 /* mask out the old offset */ 64 tile_bundle_bits mask = create_BrOff_X1(-1); 65 result = n & (~mask); 66 67 /* or in the new offset */ 68 result |= create_BrOff_X1(offset); 69 70 return result; 71} 72 73static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) 74{ 75 tile_bundle_bits result; 76 tile_bundle_bits op; 77 78 result = n & (~TILE_X1_MASK); 79 80 op = create_Opcode_X1(SPECIAL_0_OPCODE_X1) | 81 create_RRROpcodeExtension_X1(OR_SPECIAL_0_OPCODE_X1) | 82 create_Dest_X1(dest) | 83 create_SrcB_X1(TREG_ZERO) | 84 create_SrcA_X1(src) ; 85 86 result |= op; 87 return result; 88} 89 90static inline tile_bundle_bits nop_X1(tile_bundle_bits n) 91{ 92 return move_X1(n, TREG_ZERO, TREG_ZERO); 93} 94 95static inline tile_bundle_bits addi_X1( 96 tile_bundle_bits n, int dest, int src, int imm) 97{ 98 n &= ~TILE_X1_MASK; 99 100 n |= (create_SrcA_X1(src) | 101 create_Dest_X1(dest) | 102 create_Imm8_X1(imm) | 103 create_S_X1(0) | 104 create_Opcode_X1(IMM_0_OPCODE_X1) | 105 create_ImmOpcodeExtension_X1(ADDI_IMM_0_OPCODE_X1)); 106 107 return n; 108} 109 110static tile_bundle_bits rewrite_load_store_unaligned( 111 struct single_step_state *state, 112 tile_bundle_bits bundle, 113 struct pt_regs *regs, 114 enum mem_op mem_op, 115 int size, int sign_ext) 116{ 117 unsigned char __user *addr; 118 int val_reg, addr_reg, err, val; 119 120 /* Get address and value registers */ 121 if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) { 122 addr_reg = get_SrcA_Y2(bundle); 123 val_reg = get_SrcBDest_Y2(bundle); 124 } else if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { 125 addr_reg = get_SrcA_X1(bundle); 126 val_reg = get_Dest_X1(bundle); 127 } else { 128 addr_reg = get_SrcA_X1(bundle); 129 val_reg = get_SrcB_X1(bundle); 130 } 131 132 if ((val_reg >= PTREGS_NR_GPRS && 133 (val_reg != TREG_ZERO || 134 mem_op == MEMOP_LOAD || 135 mem_op == MEMOP_LOAD_POSTINCR)) || 136 addr_reg >= PTREGS_NR_GPRS) 137 return bundle; 138 139 /* If it's aligned, don't handle it specially */ 140 addr = (void __user *)regs->regs[addr_reg]; 141 if (((unsigned long)addr % size) == 0) 142 return bundle; 143 144#ifndef __LITTLE_ENDIAN 145# error We assume little-endian representation with copy_xx_user size 2 here 146#endif 147 /* Handle unaligned load/store */ 148 if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) { 149 unsigned short val_16; 150 switch (size) { 151 case 2: 152 err = copy_from_user(&val_16, addr, sizeof(val_16)); 153 val = sign_ext ? ((short)val_16) : val_16; 154 break; 155 case 4: 156 err = copy_from_user(&val, addr, sizeof(val)); 157 break; 158 default: 159 BUG(); 160 } 161 if (err == 0) { 162 state->update_reg = val_reg; 163 state->update_value = val; 164 state->update = 1; 165 } 166 } else { 167 val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg]; 168 err = copy_to_user(addr, &val, size); 169 } 170 171 if (err) { 172 siginfo_t info = { 173 .si_signo = SIGSEGV, 174 .si_code = SEGV_MAPERR, 175 .si_addr = addr 176 }; 177 force_sig_info(info.si_signo, &info, current); 178 return (tile_bundle_bits) 0; 179 } 180 181 if (unaligned_fixup == 0) { 182 siginfo_t info = { 183 .si_signo = SIGBUS, 184 .si_code = BUS_ADRALN, 185 .si_addr = addr 186 }; 187 force_sig_info(info.si_signo, &info, current); 188 return (tile_bundle_bits) 0; 189 } 190 191 if (unaligned_printk || unaligned_fixup_count == 0) { 192 pr_info("Process %d/%s: PC %#lx: Fixup of" 193 " unaligned %s at %#lx.\n", 194 current->pid, current->comm, regs->pc, 195 (mem_op == MEMOP_LOAD || 196 mem_op == MEMOP_LOAD_POSTINCR) ? 197 "load" : "store", 198 (unsigned long)addr); 199 if (!unaligned_printk) { 200#define P pr_info 201P("\n"); 202P("Unaligned fixups in the kernel will slow your application considerably.\n"); 203P("To find them, write a \"1\" to /proc/sys/tile/unaligned_fixup/printk,\n"); 204P("which requests the kernel show all unaligned fixups, or write a \"0\"\n"); 205P("to /proc/sys/tile/unaligned_fixup/enabled, in which case each unaligned\n"); 206P("access will become a SIGBUS you can debug. No further warnings will be\n"); 207P("shown so as to avoid additional slowdown, but you can track the number\n"); 208P("of fixups performed via /proc/sys/tile/unaligned_fixup/count.\n"); 209P("Use the tile-addr2line command (see \"info addr2line\") to decode PCs.\n"); 210P("\n"); 211#undef P 212 } 213 } 214 ++unaligned_fixup_count; 215 216 if (bundle & TILE_BUNDLE_Y_ENCODING_MASK) { 217 /* Convert the Y2 instruction to a prefetch. */ 218 bundle &= ~(create_SrcBDest_Y2(-1) | 219 create_Opcode_Y2(-1)); 220 bundle |= (create_SrcBDest_Y2(TREG_ZERO) | 221 create_Opcode_Y2(LW_OPCODE_Y2)); 222 /* Replace the load postincr with an addi */ 223 } else if (mem_op == MEMOP_LOAD_POSTINCR) { 224 bundle = addi_X1(bundle, addr_reg, addr_reg, 225 get_Imm8_X1(bundle)); 226 /* Replace the store postincr with an addi */ 227 } else if (mem_op == MEMOP_STORE_POSTINCR) { 228 bundle = addi_X1(bundle, addr_reg, addr_reg, 229 get_Dest_Imm8_X1(bundle)); 230 } else { 231 /* Convert the X1 instruction to a nop. */ 232 bundle &= ~(create_Opcode_X1(-1) | 233 create_UnShOpcodeExtension_X1(-1) | 234 create_UnOpcodeExtension_X1(-1)); 235 bundle |= (create_Opcode_X1(SHUN_0_OPCODE_X1) | 236 create_UnShOpcodeExtension_X1( 237 UN_0_SHUN_0_OPCODE_X1) | 238 create_UnOpcodeExtension_X1( 239 NOP_UN_0_SHUN_0_OPCODE_X1)); 240 } 241 242 return bundle; 243} 244 245/** 246 * single_step_once() - entry point when single stepping has been triggered. 247 * @regs: The machine register state 248 * 249 * When we arrive at this routine via a trampoline, the single step 250 * engine copies the executing bundle to the single step buffer. 251 * If the instruction is a condition branch, then the target is 252 * reset to one past the next instruction. If the instruction 253 * sets the lr, then that is noted. If the instruction is a jump 254 * or call, then the new target pc is preserved and the current 255 * bundle instruction set to null. 256 * 257 * The necessary post-single-step rewriting information is stored in 258 * single_step_state-> We use data segment values because the 259 * stack will be rewound when we run the rewritten single-stepped 260 * instruction. 261 */ 262void single_step_once(struct pt_regs *regs) 263{ 264 extern tile_bundle_bits __single_step_ill_insn; 265 extern tile_bundle_bits __single_step_j_insn; 266 extern tile_bundle_bits __single_step_addli_insn; 267 extern tile_bundle_bits __single_step_auli_insn; 268 struct thread_info *info = (void *)current_thread_info(); 269 struct single_step_state *state = info->step_state; 270 int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); 271 tile_bundle_bits __user *buffer, *pc; 272 tile_bundle_bits bundle; 273 int temp_reg; 274 int target_reg = TREG_LR; 275 int err; 276 enum mem_op mem_op = MEMOP_NONE; 277 int size = 0, sign_ext = 0; /* happy compiler */ 278 279 asm( 280" .pushsection .rodata.single_step\n" 281" .align 8\n" 282" .globl __single_step_ill_insn\n" 283"__single_step_ill_insn:\n" 284" ill\n" 285" .globl __single_step_addli_insn\n" 286"__single_step_addli_insn:\n" 287" { nop; addli r0, zero, 0 }\n" 288" .globl __single_step_auli_insn\n" 289"__single_step_auli_insn:\n" 290" { nop; auli r0, r0, 0 }\n" 291" .globl __single_step_j_insn\n" 292"__single_step_j_insn:\n" 293" j .\n" 294" .popsection\n" 295 ); 296 297 if (state == NULL) { 298 /* allocate a page of writable, executable memory */ 299 state = kmalloc(sizeof(struct single_step_state), GFP_KERNEL); 300 if (state == NULL) { 301 pr_err("Out of kernel memory trying to single-step\n"); 302 return; 303 } 304 305 /* allocate a cache line of writable, executable memory */ 306 down_write(¤t->mm->mmap_sem); 307 buffer = (void __user *) do_mmap(NULL, 0, 64, 308 PROT_EXEC | PROT_READ | PROT_WRITE, 309 MAP_PRIVATE | MAP_ANONYMOUS, 310 0); 311 up_write(¤t->mm->mmap_sem); 312 313 if (IS_ERR((void __force *)buffer)) { 314 kfree(state); 315 pr_err("Out of kernel pages trying to single-step\n"); 316 return; 317 } 318 319 state->buffer = buffer; 320 state->is_enabled = 0; 321 322 info->step_state = state; 323 324 /* Validate our stored instruction patterns */ 325 BUG_ON(get_Opcode_X1(__single_step_addli_insn) != 326 ADDLI_OPCODE_X1); 327 BUG_ON(get_Opcode_X1(__single_step_auli_insn) != 328 AULI_OPCODE_X1); 329 BUG_ON(get_SrcA_X1(__single_step_addli_insn) != TREG_ZERO); 330 BUG_ON(get_Dest_X1(__single_step_addli_insn) != 0); 331 BUG_ON(get_JOffLong_X1(__single_step_j_insn) != 0); 332 } 333 334 /* 335 * If we are returning from a syscall, we still haven't hit the 336 * "ill" for the swint1 instruction. So back the PC up to be 337 * pointing at the swint1, but we'll actually return directly 338 * back to the "ill" so we come back in via SIGILL as if we 339 * had "executed" the swint1 without ever being in kernel space. 340 */ 341 if (regs->faultnum == INT_SWINT_1) 342 regs->pc -= 8; 343 344 pc = (tile_bundle_bits __user *)(regs->pc); 345 if (get_user(bundle, pc) != 0) { 346 pr_err("Couldn't read instruction at %p trying to step\n", pc); 347 return; 348 } 349 350 /* We'll follow the instruction with 2 ill op bundles */ 351 state->orig_pc = (unsigned long)pc; 352 state->next_pc = (unsigned long)(pc + 1); 353 state->branch_next_pc = 0; 354 state->update = 0; 355 356 if (!(bundle & TILE_BUNDLE_Y_ENCODING_MASK)) { 357 /* two wide, check for control flow */ 358 int opcode = get_Opcode_X1(bundle); 359 360 switch (opcode) { 361 /* branches */ 362 case BRANCH_OPCODE_X1: 363 { 364 int32_t offset = signExtend17(get_BrOff_X1(bundle)); 365 366 /* 367 * For branches, we use a rewriting trick to let the 368 * hardware evaluate whether the branch is taken or 369 * untaken. We record the target offset and then 370 * rewrite the branch instruction to target 1 insn 371 * ahead if the branch is taken. We then follow the 372 * rewritten branch with two bundles, each containing 373 * an "ill" instruction. The supervisor examines the 374 * pc after the single step code is executed, and if 375 * the pc is the first ill instruction, then the 376 * branch (if any) was not taken. If the pc is the 377 * second ill instruction, then the branch was 378 * taken. The new pc is computed for these cases, and 379 * inserted into the registers for the thread. If 380 * the pc is the start of the single step code, then 381 * an exception or interrupt was taken before the 382 * code started processing, and the same "original" 383 * pc is restored. This change, different from the 384 * original implementation, has the advantage of 385 * executing a single user instruction. 386 */ 387 state->branch_next_pc = (unsigned long)(pc + offset); 388 389 /* rewrite branch offset to go forward one bundle */ 390 bundle = set_BrOff_X1(bundle, 2); 391 } 392 break; 393 394 /* jumps */ 395 case JALB_OPCODE_X1: 396 case JALF_OPCODE_X1: 397 state->update = 1; 398 state->next_pc = 399 (unsigned long) (pc + get_JOffLong_X1(bundle)); 400 break; 401 402 case JB_OPCODE_X1: 403 case JF_OPCODE_X1: 404 state->next_pc = 405 (unsigned long) (pc + get_JOffLong_X1(bundle)); 406 bundle = nop_X1(bundle); 407 break; 408 409 case SPECIAL_0_OPCODE_X1: 410 switch (get_RRROpcodeExtension_X1(bundle)) { 411 /* jump-register */ 412 case JALRP_SPECIAL_0_OPCODE_X1: 413 case JALR_SPECIAL_0_OPCODE_X1: 414 state->update = 1; 415 state->next_pc = 416 regs->regs[get_SrcA_X1(bundle)]; 417 break; 418 419 case JRP_SPECIAL_0_OPCODE_X1: 420 case JR_SPECIAL_0_OPCODE_X1: 421 state->next_pc = 422 regs->regs[get_SrcA_X1(bundle)]; 423 bundle = nop_X1(bundle); 424 break; 425 426 case LNK_SPECIAL_0_OPCODE_X1: 427 state->update = 1; 428 target_reg = get_Dest_X1(bundle); 429 break; 430 431 /* stores */ 432 case SH_SPECIAL_0_OPCODE_X1: 433 mem_op = MEMOP_STORE; 434 size = 2; 435 break; 436 437 case SW_SPECIAL_0_OPCODE_X1: 438 mem_op = MEMOP_STORE; 439 size = 4; 440 break; 441 } 442 break; 443 444 /* loads and iret */ 445 case SHUN_0_OPCODE_X1: 446 if (get_UnShOpcodeExtension_X1(bundle) == 447 UN_0_SHUN_0_OPCODE_X1) { 448 switch (get_UnOpcodeExtension_X1(bundle)) { 449 case LH_UN_0_SHUN_0_OPCODE_X1: 450 mem_op = MEMOP_LOAD; 451 size = 2; 452 sign_ext = 1; 453 break; 454 455 case LH_U_UN_0_SHUN_0_OPCODE_X1: 456 mem_op = MEMOP_LOAD; 457 size = 2; 458 sign_ext = 0; 459 break; 460 461 case LW_UN_0_SHUN_0_OPCODE_X1: 462 mem_op = MEMOP_LOAD; 463 size = 4; 464 break; 465 466 case IRET_UN_0_SHUN_0_OPCODE_X1: 467 { 468 unsigned long ex0_0 = __insn_mfspr( 469 SPR_EX_CONTEXT_0_0); 470 unsigned long ex0_1 = __insn_mfspr( 471 SPR_EX_CONTEXT_0_1); 472 /* 473 * Special-case it if we're iret'ing 474 * to PL0 again. Otherwise just let 475 * it run and it will generate SIGILL. 476 */ 477 if (EX1_PL(ex0_1) == USER_PL) { 478 state->next_pc = ex0_0; 479 regs->ex1 = ex0_1; 480 bundle = nop_X1(bundle); 481 } 482 } 483 } 484 } 485 break; 486 487#if CHIP_HAS_WH64() 488 /* postincrement operations */ 489 case IMM_0_OPCODE_X1: 490 switch (get_ImmOpcodeExtension_X1(bundle)) { 491 case LWADD_IMM_0_OPCODE_X1: 492 mem_op = MEMOP_LOAD_POSTINCR; 493 size = 4; 494 break; 495 496 case LHADD_IMM_0_OPCODE_X1: 497 mem_op = MEMOP_LOAD_POSTINCR; 498 size = 2; 499 sign_ext = 1; 500 break; 501 502 case LHADD_U_IMM_0_OPCODE_X1: 503 mem_op = MEMOP_LOAD_POSTINCR; 504 size = 2; 505 sign_ext = 0; 506 break; 507 508 case SWADD_IMM_0_OPCODE_X1: 509 mem_op = MEMOP_STORE_POSTINCR; 510 size = 4; 511 break; 512 513 case SHADD_IMM_0_OPCODE_X1: 514 mem_op = MEMOP_STORE_POSTINCR; 515 size = 2; 516 break; 517 518 default: 519 break; 520 } 521 break; 522#endif /* CHIP_HAS_WH64() */ 523 } 524 525 if (state->update) { 526 /* 527 * Get an available register. We start with a 528 * bitmask with 1's for available registers. 529 * We truncate to the low 32 registers since 530 * we are guaranteed to have set bits in the 531 * low 32 bits, then use ctz to pick the first. 532 */ 533 u32 mask = (u32) ~((1ULL << get_Dest_X0(bundle)) | 534 (1ULL << get_SrcA_X0(bundle)) | 535 (1ULL << get_SrcB_X0(bundle)) | 536 (1ULL << target_reg)); 537 temp_reg = __builtin_ctz(mask); 538 state->update_reg = temp_reg; 539 state->update_value = regs->regs[temp_reg]; 540 regs->regs[temp_reg] = (unsigned long) (pc+1); 541 regs->flags |= PT_FLAGS_RESTORE_REGS; 542 bundle = move_X1(bundle, target_reg, temp_reg); 543 } 544 } else { 545 int opcode = get_Opcode_Y2(bundle); 546 547 switch (opcode) { 548 /* loads */ 549 case LH_OPCODE_Y2: 550 mem_op = MEMOP_LOAD; 551 size = 2; 552 sign_ext = 1; 553 break; 554 555 case LH_U_OPCODE_Y2: 556 mem_op = MEMOP_LOAD; 557 size = 2; 558 sign_ext = 0; 559 break; 560 561 case LW_OPCODE_Y2: 562 mem_op = MEMOP_LOAD; 563 size = 4; 564 break; 565 566 /* stores */ 567 case SH_OPCODE_Y2: 568 mem_op = MEMOP_STORE; 569 size = 2; 570 break; 571 572 case SW_OPCODE_Y2: 573 mem_op = MEMOP_STORE; 574 size = 4; 575 break; 576 } 577 } 578 579 /* 580 * Check if we need to rewrite an unaligned load/store. 581 * Returning zero is a special value meaning we need to SIGSEGV. 582 */ 583 if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) { 584 bundle = rewrite_load_store_unaligned(state, bundle, regs, 585 mem_op, size, sign_ext); 586 if (bundle == 0) 587 return; 588 } 589 590 /* write the bundle to our execution area */ 591 buffer = state->buffer; 592 err = __put_user(bundle, buffer++); 593 594 /* 595 * If we're really single-stepping, we take an INT_ILL after. 596 * If we're just handling an unaligned access, we can just 597 * jump directly back to where we were in user code. 598 */ 599 if (is_single_step) { 600 err |= __put_user(__single_step_ill_insn, buffer++); 601 err |= __put_user(__single_step_ill_insn, buffer++); 602 } else { 603 long delta; 604 605 if (state->update) { 606 /* We have some state to update; do it inline */ 607 int ha16; 608 bundle = __single_step_addli_insn; 609 bundle |= create_Dest_X1(state->update_reg); 610 bundle |= create_Imm16_X1(state->update_value); 611 err |= __put_user(bundle, buffer++); 612 bundle = __single_step_auli_insn; 613 bundle |= create_Dest_X1(state->update_reg); 614 bundle |= create_SrcA_X1(state->update_reg); 615 ha16 = (state->update_value + 0x8000) >> 16; 616 bundle |= create_Imm16_X1(ha16); 617 err |= __put_user(bundle, buffer++); 618 state->update = 0; 619 } 620 621 /* End with a jump back to the next instruction */ 622 delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) - 623 (unsigned long)buffer) >> 624 TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES; 625 bundle = __single_step_j_insn; 626 bundle |= create_JOffLong_X1(delta); 627 err |= __put_user(bundle, buffer++); 628 } 629 630 if (err) { 631 pr_err("Fault when writing to single-step buffer\n"); 632 return; 633 } 634 635 /* 636 * Flush the buffer. 637 * We do a local flush only, since this is a thread-specific buffer. 638 */ 639 __flush_icache_range((unsigned long)state->buffer, 640 (unsigned long)buffer); 641 642 /* Indicate enabled */ 643 state->is_enabled = is_single_step; 644 regs->pc = (unsigned long)state->buffer; 645 646 /* Fault immediately if we are coming back from a syscall. */ 647 if (regs->faultnum == INT_SWINT_1) 648 regs->pc += 8; 649} 650 651#endif /* !__tilegx__ */ 652