1/* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (c) 2000-2006 Silicon Graphics, Inc. All Rights Reserved. 7 */ 8 9#include <linux/module.h> 10#include <asm/sn/nodepda.h> 11#include <asm/sn/addrs.h> 12#include <asm/sn/arch.h> 13#include <asm/sn/sn_cpuid.h> 14#include <asm/sn/pda.h> 15#include <asm/sn/shubio.h> 16#include <asm/nodedata.h> 17#include <asm/delay.h> 18 19#include <linux/bootmem.h> 20#include <linux/string.h> 21#include <linux/sched.h> 22 23#include <asm/sn/bte.h> 24 25#ifndef L1_CACHE_MASK 26#define L1_CACHE_MASK (L1_CACHE_BYTES - 1) 27#endif 28 29/* two interfaces on two btes */ 30#define MAX_INTERFACES_TO_TRY 4 31#define MAX_NODES_TO_TRY 2 32 33static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) 34{ 35 nodepda_t *tmp_nodepda; 36 37 if (nasid_to_cnodeid(nasid) == -1) 38 return (struct bteinfo_s *)NULL; 39 40 tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); 41 return &tmp_nodepda->bte_if[interface]; 42 43} 44 45static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode) 46{ 47 if (is_shub2()) { 48 BTE_CTRL_STORE(bte, (IBLS_BUSY | ((len) | (mode) << 24))); 49 } else { 50 BTE_LNSTAT_STORE(bte, len); 51 BTE_CTRL_STORE(bte, mode); 52 } 53} 54 55/************************************************************************ 56 * Block Transfer Engine copy related functions. 57 * 58 ***********************************************************************/ 59 60/* 61 * bte_copy(src, dest, len, mode, notification) 62 * 63 * Use the block transfer engine to move kernel memory from src to dest 64 * using the assigned mode. 65 * 66 * Parameters: 67 * src - physical address of the transfer source. 68 * dest - physical address of the transfer destination. 69 * len - number of bytes to transfer from source to dest. 70 * mode - hardware defined. See reference information 71 * for IBCT0/1 in the SHUB Programmers Reference 72 * notification - kernel virtual address of the notification cache 73 * line. If NULL, the default is used and 74 * the bte_copy is synchronous. 75 * 76 * NOTE: This function requires src, dest, and len to 77 * be cacheline aligned. 78 */ 79bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) 80{ 81 u64 transfer_size; 82 u64 transfer_stat; 83 u64 notif_phys_addr; 84 struct bteinfo_s *bte; 85 bte_result_t bte_status; 86 unsigned long irq_flags; 87 unsigned long itc_end = 0; 88 int nasid_to_try[MAX_NODES_TO_TRY]; 89 int my_nasid = cpuid_to_nasid(raw_smp_processor_id()); 90 int bte_if_index, nasid_index; 91 int bte_first, btes_per_node = BTES_PER_NODE; 92 93 BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n", 94 src, dest, len, mode, notification)); 95 96 if (len == 0) { 97 return BTE_SUCCESS; 98 } 99 100 BUG_ON((len & L1_CACHE_MASK) || 101 (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)); 102 BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT))); 103 104 /* 105 * Start with interface corresponding to cpu number 106 */ 107 bte_first = raw_smp_processor_id() % btes_per_node; 108 109 if (mode & BTE_USE_DEST) { 110 /* try remote then local */ 111 nasid_to_try[0] = NASID_GET(dest); 112 if (mode & BTE_USE_ANY) { 113 nasid_to_try[1] = my_nasid; 114 } else { 115 nasid_to_try[1] = (int)NULL; 116 } 117 } else { 118 /* try local then remote */ 119 nasid_to_try[0] = my_nasid; 120 if (mode & BTE_USE_ANY) { 121 nasid_to_try[1] = NASID_GET(dest); 122 } else { 123 nasid_to_try[1] = (int)NULL; 124 } 125 } 126 127retry_bteop: 128 do { 129 local_irq_save(irq_flags); 130 131 bte_if_index = bte_first; 132 nasid_index = 0; 133 134 /* Attempt to lock one of the BTE interfaces. */ 135 while (nasid_index < MAX_NODES_TO_TRY) { 136 bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index); 137 138 if (bte == NULL) { 139 nasid_index++; 140 continue; 141 } 142 143 if (spin_trylock(&bte->spinlock)) { 144 if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) || 145 (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) { 146 /* Got the lock but BTE still busy */ 147 spin_unlock(&bte->spinlock); 148 } else { 149 /* we got the lock and it's not busy */ 150 break; 151 } 152 } 153 154 bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */ 155 if (bte_if_index == bte_first) { 156 /* 157 * We've tried all interfaces on this node 158 */ 159 nasid_index++; 160 } 161 162 bte = NULL; 163 } 164 165 if (bte != NULL) { 166 break; 167 } 168 169 local_irq_restore(irq_flags); 170 171 if (!(mode & BTE_WACQUIRE)) { 172 return BTEFAIL_NOTAVAIL; 173 } 174 } while (1); 175 176 if (notification == NULL) { 177 /* User does not want to be notified. */ 178 bte->most_rcnt_na = &bte->notify; 179 } else { 180 bte->most_rcnt_na = notification; 181 } 182 183 /* Calculate the number of cache lines to transfer. */ 184 transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK); 185 186 /* Initialize the notification to a known value. */ 187 *bte->most_rcnt_na = BTE_WORD_BUSY; 188 notif_phys_addr = (u64)bte->most_rcnt_na; 189 190 /* Set the source and destination registers */ 191 BTE_PRINTKV(("IBSA = 0x%lx)\n", src)); 192 BTE_SRC_STORE(bte, src); 193 BTE_PRINTKV(("IBDA = 0x%lx)\n", dest)); 194 BTE_DEST_STORE(bte, dest); 195 196 /* Set the notification register */ 197 BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr)); 198 BTE_NOTIF_STORE(bte, notif_phys_addr); 199 200 /* Initiate the transfer */ 201 BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); 202 bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode)); 203 204 itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); 205 206 spin_unlock_irqrestore(&bte->spinlock, irq_flags); 207 208 if (notification != NULL) { 209 return BTE_SUCCESS; 210 } 211 212 while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) { 213 cpu_relax(); 214 if (ia64_get_itc() > itc_end) { 215 BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n", 216 NASID_GET(bte->bte_base_addr), bte->bte_num, 217 BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) ); 218 bte->bte_error_count++; 219 bte->bh_error = IBLS_ERROR; 220 bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode)); 221 *bte->most_rcnt_na = BTE_WORD_AVAILABLE; 222 goto retry_bteop; 223 } 224 } 225 226 BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n", 227 BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); 228 229 if (transfer_stat & IBLS_ERROR) { 230 bte_status = transfer_stat & ~IBLS_ERROR; 231 } else { 232 bte_status = BTE_SUCCESS; 233 } 234 *bte->most_rcnt_na = BTE_WORD_AVAILABLE; 235 236 BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n", 237 BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na)); 238 239 return bte_status; 240} 241 242EXPORT_SYMBOL(bte_copy); 243 244/* 245 * bte_unaligned_copy(src, dest, len, mode) 246 * 247 * use the block transfer engine to move kernel 248 * memory from src to dest using the assigned mode. 249 * 250 * Parameters: 251 * src - physical address of the transfer source. 252 * dest - physical address of the transfer destination. 253 * len - number of bytes to transfer from source to dest. 254 * mode - hardware defined. See reference information 255 * for IBCT0/1 in the SGI documentation. 256 * 257 * NOTE: If the source, dest, and len are all cache line aligned, 258 * then it would be _FAR_ preferable to use bte_copy instead. 259 */ 260bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) 261{ 262 int destFirstCacheOffset; 263 u64 headBteSource; 264 u64 headBteLen; 265 u64 headBcopySrcOffset; 266 u64 headBcopyDest; 267 u64 headBcopyLen; 268 u64 footBteSource; 269 u64 footBteLen; 270 u64 footBcopyDest; 271 u64 footBcopyLen; 272 bte_result_t rv; 273 char *bteBlock, *bteBlock_unaligned; 274 275 if (len == 0) { 276 return BTE_SUCCESS; 277 } 278 279 /* temporary buffer used during unaligned transfers */ 280 bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES, GFP_KERNEL); 281 if (bteBlock_unaligned == NULL) { 282 return BTEFAIL_NOTAVAIL; 283 } 284 bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned); 285 286 headBcopySrcOffset = src & L1_CACHE_MASK; 287 destFirstCacheOffset = dest & L1_CACHE_MASK; 288 289 /* 290 * At this point, the transfer is broken into 291 * (up to) three sections. The first section is 292 * from the start address to the first physical 293 * cache line, the second is from the first physical 294 * cache line to the last complete cache line, 295 * and the third is from the last cache line to the 296 * end of the buffer. The first and third sections 297 * are handled by bte copying into a temporary buffer 298 * and then bcopy'ing the necessary section into the 299 * final location. The middle section is handled with 300 * a standard bte copy. 301 * 302 * One nasty exception to the above rule is when the 303 * source and destination are not symmetrically 304 * mis-aligned. If the source offset from the first 305 * cache line is different from the destination offset, 306 * we make the first section be the entire transfer 307 * and the bcopy the entire block into place. 308 */ 309 if (headBcopySrcOffset == destFirstCacheOffset) { 310 311 /* 312 * Both the source and destination are the same 313 * distance from a cache line boundary so we can 314 * use the bte to transfer the bulk of the 315 * data. 316 */ 317 headBteSource = src & ~L1_CACHE_MASK; 318 headBcopyDest = dest; 319 if (headBcopySrcOffset) { 320 headBcopyLen = 321 (len > 322 (L1_CACHE_BYTES - 323 headBcopySrcOffset) ? L1_CACHE_BYTES 324 - headBcopySrcOffset : len); 325 headBteLen = L1_CACHE_BYTES; 326 } else { 327 headBcopyLen = 0; 328 headBteLen = 0; 329 } 330 331 if (len > headBcopyLen) { 332 footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK; 333 footBteLen = L1_CACHE_BYTES; 334 335 footBteSource = src + len - footBcopyLen; 336 footBcopyDest = dest + len - footBcopyLen; 337 338 if (footBcopyDest == (headBcopyDest + headBcopyLen)) { 339 /* 340 * We have two contiguous bcopy 341 * blocks. Merge them. 342 */ 343 headBcopyLen += footBcopyLen; 344 headBteLen += footBteLen; 345 } else if (footBcopyLen > 0) { 346 rv = bte_copy(footBteSource, 347 ia64_tpa((unsigned long)bteBlock), 348 footBteLen, mode, NULL); 349 if (rv != BTE_SUCCESS) { 350 kfree(bteBlock_unaligned); 351 return rv; 352 } 353 354 memcpy(__va(footBcopyDest), 355 (char *)bteBlock, footBcopyLen); 356 } 357 } else { 358 footBcopyLen = 0; 359 footBteLen = 0; 360 } 361 362 if (len > (headBcopyLen + footBcopyLen)) { 363 /* now transfer the middle. */ 364 rv = bte_copy((src + headBcopyLen), 365 (dest + 366 headBcopyLen), 367 (len - headBcopyLen - 368 footBcopyLen), mode, NULL); 369 if (rv != BTE_SUCCESS) { 370 kfree(bteBlock_unaligned); 371 return rv; 372 } 373 374 } 375 } else { 376 377 /* 378 * The transfer is not symmetric, we will 379 * allocate a buffer large enough for all the 380 * data, bte_copy into that buffer and then 381 * bcopy to the destination. 382 */ 383 384 headBcopySrcOffset = src & L1_CACHE_MASK; 385 headBcopyDest = dest; 386 headBcopyLen = len; 387 388 headBteSource = src - headBcopySrcOffset; 389 /* Add the leading and trailing bytes from source */ 390 headBteLen = L1_CACHE_ALIGN(len + headBcopySrcOffset); 391 } 392 393 if (headBcopyLen > 0) { 394 rv = bte_copy(headBteSource, 395 ia64_tpa((unsigned long)bteBlock), headBteLen, 396 mode, NULL); 397 if (rv != BTE_SUCCESS) { 398 kfree(bteBlock_unaligned); 399 return rv; 400 } 401 402 memcpy(__va(headBcopyDest), ((char *)bteBlock + 403 headBcopySrcOffset), headBcopyLen); 404 } 405 kfree(bteBlock_unaligned); 406 return BTE_SUCCESS; 407} 408 409EXPORT_SYMBOL(bte_unaligned_copy); 410 411/************************************************************************ 412 * Block Transfer Engine initialization functions. 413 * 414 ***********************************************************************/ 415 416/* 417 * bte_init_node(nodepda, cnode) 418 * 419 * Initialize the nodepda structure with BTE base addresses and 420 * spinlocks. 421 */ 422void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode) 423{ 424 int i; 425 426 /* 427 * Indicate that all the block transfer engines on this node 428 * are available. 429 */ 430 431 /* 432 * Allocate one bte_recover_t structure per node. It holds 433 * the recovery lock for node. All the bte interface structures 434 * will point at this one bte_recover structure to get the lock. 435 */ 436 spin_lock_init(&mynodepda->bte_recovery_lock); 437 init_timer(&mynodepda->bte_recovery_timer); 438 mynodepda->bte_recovery_timer.function = bte_error_handler; 439 mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda; 440 441 for (i = 0; i < BTES_PER_NODE; i++) { 442 u64 *base_addr; 443 444 /* Which link status register should we use? */ 445 base_addr = (u64 *) 446 REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), BTE_BASE_ADDR(i)); 447 mynodepda->bte_if[i].bte_base_addr = base_addr; 448 mynodepda->bte_if[i].bte_source_addr = BTE_SOURCE_ADDR(base_addr); 449 mynodepda->bte_if[i].bte_destination_addr = BTE_DEST_ADDR(base_addr); 450 mynodepda->bte_if[i].bte_control_addr = BTE_CTRL_ADDR(base_addr); 451 mynodepda->bte_if[i].bte_notify_addr = BTE_NOTIF_ADDR(base_addr); 452 453 /* 454 * Initialize the notification and spinlock 455 * so the first transfer can occur. 456 */ 457 mynodepda->bte_if[i].most_rcnt_na = 458 &(mynodepda->bte_if[i].notify); 459 mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE; 460 spin_lock_init(&mynodepda->bte_if[i].spinlock); 461 462 mynodepda->bte_if[i].bte_cnode = cnode; 463 mynodepda->bte_if[i].bte_error_count = 0; 464 mynodepda->bte_if[i].bte_num = i; 465 mynodepda->bte_if[i].cleanup_active = 0; 466 mynodepda->bte_if[i].bh_error = 0; 467 } 468 469} 470