1/* 2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31#include <debug.h> 32#include <ppc/asm.h> 33#include <ppc/proc_reg.h> 34#include <mach/ppc/vm_param.h> 35#include <assym.s> 36#include <sys/errno.h> 37 38#define INSTRUMENT 0 39 40//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 41/* 42 * void pmap_zero_page(vm_offset_t pa) 43 * 44 * Zero a page of physical memory. This routine runs in 32 or 64-bit mode, 45 * and handles 32 and 128-byte cache lines. 46 */ 47 48 49 .align 5 50 .globl EXT(pmap_zero_page) 51 52LEXT(pmap_zero_page) 53 54 mflr r12 // save return address 55 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 56 mtlr r12 // restore return address 57 andi. r9,r10,pf32Byte+pf128Byte // r9 <- cache line size 58 59 subfic r4,r9,PPC_PGBYTES // r4 <- starting offset in page 60 61 bt++ pf64Bitb,page0S4 // Go do the big guys... 62 63 slwi r3,r3,12 // get page address from page num 64 b page_zero_1 // Jump to line aligned loop... 65 66 .align 5 67 68 nop 69 nop 70 nop 71 nop 72 nop 73 nop 74 nop 75 76page0S4: 77 sldi r3,r3,12 // get page address from page num 78 79page_zero_1: // loop zeroing cache lines 80 sub. r5,r4,r9 // more to go? 81 dcbz128 r3,r4 // zero either 32 or 128 bytes 82 sub r4,r5,r9 // generate next offset 83 dcbz128 r3,r5 84 bne-- page_zero_1 85 86 b EXT(ml_restore) // restore MSR and do the isync 87 88 89//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 90/* void 91 * phys_copy(src, dst, bytecount) 92 * addr64_t src; 93 * addr64_t dst; 94 * int bytecount 95 * 96 * This routine will copy bytecount bytes from physical address src to physical 97 * address dst. It runs in 64-bit mode if necessary, but does not handle 98 * overlap or make any attempt to be optimal. Length must be a signed word. 99 * Not performance critical. 100 */ 101 102 103 .align 5 104 .globl EXT(phys_copy) 105 106LEXT(phys_copy) 107 108 rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg 109 mflr r12 // get return address 110 rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits 111 rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg 112 bl EXT(ml_set_physical_disabled) // turn DR and EE off, SF on, get features in r10 113 rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits 114 mtlr r12 // restore return address 115 subic. r5,r7,4 // a word to copy? 116 b phys_copy_2 117 118 .align 5 119 120phys_copy_1: // loop copying words 121 subic. r5,r5,4 // more to go? 122 lwz r0,0(r3) 123 addi r3,r3,4 124 stw r0,0(r4) 125 addi r4,r4,4 126phys_copy_2: 127 bge phys_copy_1 128 addic. r5,r5,4 // restore count 129 ble phys_copy_4 // no more 130 131 // Loop is aligned here 132 133phys_copy_3: // loop copying bytes 134 subic. r5,r5,1 // more to go? 135 lbz r0,0(r3) 136 addi r3,r3,1 137 stb r0,0(r4) 138 addi r4,r4,1 139 bgt phys_copy_3 140phys_copy_4: 141 b EXT(ml_restore) // restore MSR and do the isync 142 143 144//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 145/* void 146 * pmap_copy_page(src, dst) 147 * ppnum_t src; 148 * ppnum_t dst; 149 * 150 * This routine will copy the physical page src to physical page dst 151 * 152 * This routine assumes that the src and dst are page numbers and that the 153 * destination is cached. It runs on 32 and 64 bit processors, with and 154 * without altivec, and with 32 and 128 byte cache lines. 155 * We also must assume that no-one will be executing within the destination 156 * page, and that this will be used for paging. Because this 157 * is a common routine, we have tuned loops for each processor class. 158 * 159 */ 160#define kSFSize (FM_SIZE+160) 161 162ENTRY(pmap_copy_page, TAG_NO_FRAME_USED) 163 164 lis r2,hi16(MASK(MSR_VEC)) ; Get the vector flag 165 mflr r0 // get return 166 ori r2,r2,lo16(MASK(MSR_FP)) ; Add the FP flag 167 stw r0,8(r1) // save 168 stwu r1,-kSFSize(r1) // set up a stack frame for VRs or FPRs 169 mfmsr r11 // save MSR at entry 170 mfsprg r10,2 // get feature flags 171 andc r11,r11,r2 // Clear out vec and fp 172 ori r2,r2,lo16(MASK(MSR_EE)) // Get EE on also 173 andc r2,r11,r2 // Clear out EE as well 174 mtcrf 0x02,r10 // we need to test pf64Bit 175 ori r2,r2,MASK(MSR_FP) // must enable FP for G3... 176 mtcrf 0x80,r10 // we need to test pfAltivec too 177 oris r2,r2,hi16(MASK(MSR_VEC)) // enable altivec for G4 (ignored if G3) 178 mtmsr r2 // turn EE off, FP and VEC on 179 isync 180 bt++ pf64Bitb,pmap_copy_64 // skip if 64-bit processor (only they take hint) 181 slwi r3,r3,12 // get page address from page num 182 slwi r4,r4,12 // get page address from page num 183 rlwinm r12,r2,0,MSR_DR_BIT+1,MSR_DR_BIT-1 // get ready to turn off DR 184 bt pfAltivecb,pmap_copy_g4 // altivec but not 64-bit means G4 185 186 187 // G3 -- copy using FPRs 188 189 stfd f0,FM_SIZE+0(r1) // save the 4 FPRs we use to copy 190 stfd f1,FM_SIZE+8(r1) 191 li r5,PPC_PGBYTES/32 // count of cache lines in a page 192 stfd f2,FM_SIZE+16(r1) 193 mtctr r5 194 stfd f3,FM_SIZE+24(r1) 195 mtmsr r12 // turn off DR after saving FPRs on stack 196 isync 197 198pmap_g3_copy_loop: // loop over 32-byte cache lines 199 dcbz 0,r4 // avoid read of dest line 200 lfd f0,0(r3) 201 lfd f1,8(r3) 202 lfd f2,16(r3) 203 lfd f3,24(r3) 204 addi r3,r3,32 205 stfd f0,0(r4) 206 stfd f1,8(r4) 207 stfd f2,16(r4) 208 stfd f3,24(r4) 209 dcbst 0,r4 // flush dest line to RAM 210 addi r4,r4,32 211 bdnz pmap_g3_copy_loop 212 213 sync // wait for stores to take 214 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page 215 li r6,PPC_PGBYTES-32 // point to last line in page 216pmap_g3_icache_flush: 217 subic. r5,r6,32 // more to go? 218 icbi r4,r6 // flush another line in icache 219 subi r6,r5,32 // get offset to next line 220 icbi r4,r5 221 bne pmap_g3_icache_flush 222 223 sync 224 mtmsr r2 // turn DR back on 225 isync 226 lfd f0,FM_SIZE+0(r1) // restore the FPRs 227 lfd f1,FM_SIZE+8(r1) 228 lfd f2,FM_SIZE+16(r1) 229 lfd f3,FM_SIZE+24(r1) 230 231 b pmap_g4_restore // restore MSR and done 232 233 234 // G4 -- copy using VRs 235 236pmap_copy_g4: // r2=(MSR-EE), r12=(r2-DR), r10=features, r11=old MSR 237 la r9,FM_SIZE+16(r1) // place where we save VRs to r9 238 li r5,16 // load x-form offsets into r5-r9 239 li r6,32 // another offset 240 stvx v0,0,r9 // save some VRs so we can use to copy 241 li r7,48 // another offset 242 stvx v1,r5,r9 243 li r0,PPC_PGBYTES/64 // we loop over 64-byte chunks 244 stvx v2,r6,r9 245 mtctr r0 246 li r8,96 // get look-ahead for touch 247 stvx v3,r7,r9 248 li r9,128 249 mtmsr r12 // now we've saved VRs on stack, turn off DR 250 isync // wait for it to happen 251 b pmap_g4_copy_loop 252 253 .align 5 // align inner loops 254pmap_g4_copy_loop: // loop over 64-byte chunks 255 dcbt r3,r8 // touch 3 lines ahead 256 nop // avoid a 17-word loop... 257 dcbt r3,r9 // touch 4 lines ahead 258 nop // more padding 259 dcba 0,r4 // avoid pre-fetch of 1st dest line 260 lvx v0,0,r3 // offset 0 261 lvx v1,r5,r3 // offset 16 262 lvx v2,r6,r3 // offset 32 263 lvx v3,r7,r3 // offset 48 264 addi r3,r3,64 265 dcba r6,r4 // avoid pre-fetch of 2nd line 266 stvx v0,0,r4 // offset 0 267 stvx v1,r5,r4 // offset 16 268 stvx v2,r6,r4 // offset 32 269 stvx v3,r7,r4 // offset 48 270 dcbf 0,r4 // push line 1 271 dcbf r6,r4 // and line 2 272 addi r4,r4,64 273 bdnz pmap_g4_copy_loop 274 275 sync // wait for stores to take 276 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page 277 li r8,PPC_PGBYTES-32 // point to last line in page 278pmap_g4_icache_flush: 279 subic. r9,r8,32 // more to go? 280 icbi r4,r8 // flush from icache 281 subi r8,r9,32 // get offset to next line 282 icbi r4,r9 283 bne pmap_g4_icache_flush 284 285 sync 286 mtmsr r2 // turn DR back on 287 isync 288 la r9,FM_SIZE+16(r1) // get base of VR save area 289 lvx v0,0,r9 // restore the VRs 290 lvx v1,r5,r9 291 lvx v2,r6,r9 292 lvx v3,r7,r9 293 294pmap_g4_restore: // r11=MSR 295 mtmsr r11 // turn EE on, VEC and FR off 296 isync // wait for it to happen 297 addi r1,r1,kSFSize // pop off our stack frame 298 lwz r0,8(r1) // restore return address 299 mtlr r0 300 blr 301 302 303 // 64-bit/128-byte processor: copy using VRs 304 305pmap_copy_64: // r10=features, r11=old MSR 306 sldi r3,r3,12 // get page address from page num 307 sldi r4,r4,12 // get page address from page num 308 la r9,FM_SIZE+16(r1) // get base of VR save area 309 li r5,16 // load x-form offsets into r5-r9 310 li r6,32 // another offset 311 bf pfAltivecb,pmap_novmx_copy // altivec suppressed... 312 stvx v0,0,r9 // save 8 VRs so we can copy wo bubbles 313 stvx v1,r5,r9 314 li r7,48 // another offset 315 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks 316 stvx v2,r6,r9 317 stvx v3,r7,r9 318 addi r9,r9,64 // advance base ptr so we can store another 4 319 mtctr r0 320 li r0,MASK(MSR_DR) // get DR bit 321 stvx v4,0,r9 322 stvx v5,r5,r9 323 andc r12,r2,r0 // turn off DR bit 324 li r0,1 // get a 1 to slam into SF 325 stvx v6,r6,r9 326 stvx v7,r7,r9 327 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) 328 li r8,-128 // offset so we can reach back one line 329 mtmsrd r12 // now we've saved VRs, turn DR off and SF on 330 isync // wait for it to happen 331 dcbt128 0,r3,1 // start a forward stream 332 b pmap_64_copy_loop 333 334 .align 5 // align inner loops 335pmap_64_copy_loop: // loop over 128-byte chunks 336 dcbz128 0,r4 // avoid read of destination line 337 lvx v0,0,r3 // offset 0 338 lvx v1,r5,r3 // offset 16 339 lvx v2,r6,r3 // offset 32 340 lvx v3,r7,r3 // offset 48 341 addi r3,r3,64 // don't have enough GPRs so add 64 2x 342 lvx v4,0,r3 // offset 64 343 lvx v5,r5,r3 // offset 80 344 lvx v6,r6,r3 // offset 96 345 lvx v7,r7,r3 // offset 112 346 addi r3,r3,64 347 stvx v0,0,r4 // offset 0 348 stvx v1,r5,r4 // offset 16 349 stvx v2,r6,r4 // offset 32 350 stvx v3,r7,r4 // offset 48 351 addi r4,r4,64 352 stvx v4,0,r4 // offset 64 353 stvx v5,r5,r4 // offset 80 354 stvx v6,r6,r4 // offset 96 355 stvx v7,r7,r4 // offset 112 356 addi r4,r4,64 357 dcbf r8,r4 // flush the line we just wrote 358 bdnz pmap_64_copy_loop 359 360 sync // wait for stores to take 361 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page 362 li r8,PPC_PGBYTES-128 // point to last line in page 363pmap_64_icache_flush: 364 subic. r9,r8,128 // more to go? 365 icbi r4,r8 // flush from icache 366 subi r8,r9,128 // get offset to next line 367 icbi r4,r9 368 bne pmap_64_icache_flush 369 370 sync 371 mtmsrd r2 // turn DR back on, SF off 372 isync 373 la r9,FM_SIZE+16(r1) // get base address of VR save area on stack 374 lvx v0,0,r9 // restore the VRs 375 lvx v1,r5,r9 376 lvx v2,r6,r9 377 lvx v3,r7,r9 378 addi r9,r9,64 379 lvx v4,0,r9 380 lvx v5,r5,r9 381 lvx v6,r6,r9 382 lvx v7,r7,r9 383 384 b pmap_g4_restore // restore lower half of MSR and return 385 386 // 387 // Copy on 64-bit without VMX 388 // 389 390pmap_novmx_copy: 391 li r0,PPC_PGBYTES/128 // we loop over 128-byte chunks 392 mtctr r0 393 li r0,MASK(MSR_DR) // get DR bit 394 andc r12,r2,r0 // turn off DR bit 395 li r0,1 // get a 1 to slam into SF 396 rldimi r12,r0,63,MSR_SF_BIT // set SF bit (bit 0) 397 mtmsrd r12 // now we've saved VRs, turn DR off and SF on 398 isync // wait for it to happen 399 dcbt128 0,r3,1 // start a forward stream 400 401pmap_novmx_copy_loop: // loop over 128-byte cache lines 402 dcbz128 0,r4 // avoid read of dest line 403 404 ld r0,0(r3) // Load half a line 405 ld r12,8(r3) 406 ld r5,16(r3) 407 ld r6,24(r3) 408 ld r7,32(r3) 409 ld r8,40(r3) 410 ld r9,48(r3) 411 ld r10,56(r3) 412 413 std r0,0(r4) // Store half a line 414 std r12,8(r4) 415 std r5,16(r4) 416 std r6,24(r4) 417 std r7,32(r4) 418 std r8,40(r4) 419 std r9,48(r4) 420 std r10,56(r4) 421 422 ld r0,64(r3) // Load half a line 423 ld r12,72(r3) 424 ld r5,80(r3) 425 ld r6,88(r3) 426 ld r7,96(r3) 427 ld r8,104(r3) 428 ld r9,112(r3) 429 ld r10,120(r3) 430 431 addi r3,r3,128 432 433 std r0,64(r4) // Store half a line 434 std r12,72(r4) 435 std r5,80(r4) 436 std r6,88(r4) 437 std r7,96(r4) 438 std r8,104(r4) 439 std r9,112(r4) 440 std r10,120(r4) 441 442 dcbf 0,r4 // flush the line we just wrote 443 addi r4,r4,128 444 bdnz pmap_novmx_copy_loop 445 446 sync // wait for stores to take 447 subi r4,r4,PPC_PGBYTES // restore ptr to destintation page 448 li r8,PPC_PGBYTES-128 // point to last line in page 449 450pmap_novmx_icache_flush: 451 subic. r9,r8,128 // more to go? 452 icbi r4,r8 // flush from icache 453 subi r8,r9,128 // get offset to next line 454 icbi r4,r9 455 bne pmap_novmx_icache_flush 456 457 sync 458 mtmsrd r2 // turn DR back on, SF off 459 isync 460 461 b pmap_g4_restore // restore lower half of MSR and return 462 463 464 465//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 466 467// Stack frame format used by copyin, copyout, copyinstr and copyoutstr. 468// These routines all run both on 32 and 64-bit machines, though because they are called 469// by the BSD kernel they are always in 32-bit mode when entered. The mapped ptr returned 470// by MapUserMemoryWindow will be 64 bits however on 64-bit machines. Beware to avoid 471// using compare instructions on this ptr. This mapped ptr is kept globally in r31, so there 472// is no need to store or load it, which are mode-dependent operations since it could be 473// 32 or 64 bits. 474 475#define kkFrameSize (FM_SIZE+32) 476 477#define kkBufSize (FM_SIZE+0) 478#define kkCR3 (FM_SIZE+4) 479#define kkSource (FM_SIZE+8) 480#define kkDest (FM_SIZE+12) 481#define kkCountPtr (FM_SIZE+16) 482#define kkR31Save (FM_SIZE+20) 483#define kkThrErrJmp (FM_SIZE+24) 484 485 486// nonvolatile CR bits we use as flags in cr3 487 488#define kk64bit 12 489#define kkNull 13 490#define kkIn 14 491#define kkString 15 492#define kkZero 15 493 494 495//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 496/* 497 * int 498 * copyoutstr(src, dst, maxcount, count) 499 * vm_offset_t src; // r3 500 * addr64_t dst; // r4 and r5 501 * vm_size_t maxcount; // r6 502 * vm_size_t* count; // r7 503 * 504 * Set *count to the number of bytes copied. 505 */ 506 507ENTRY(copyoutstr, TAG_NO_FRAME_USED) 508 mfcr r2,0x10 // save caller's cr3, which we use for flags 509 mr r10,r4 // move high word of 64-bit user address to r10 510 li r0,0 511 crset kkString // flag as a string op 512 mr r11,r5 // move low word of 64-bit user address to r11 513 stw r0,0(r7) // initialize #bytes moved 514 crclr kkIn // flag as copyout 515 b copyJoin 516 517 518//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 519/* 520 * int 521 * copyinstr(src, dst, maxcount, count) 522 * addr64_t src; // r3 and r4 523 * vm_offset_t dst; // r5 524 * vm_size_t maxcount; // r6 525 * vm_size_t* count; // r7 526 * 527 * Set *count to the number of bytes copied 528 * If dst == NULL, don't copy, just count bytes. 529 * Only currently called from klcopyinstr. 530 */ 531 532ENTRY(copyinstr, TAG_NO_FRAME_USED) 533 mfcr r2,0x10 // save caller's cr3, which we use for flags 534 cmplwi r5,0 // dst==NULL? 535 mr r10,r3 // move high word of 64-bit user address to r10 536 li r0,0 537 crset kkString // flag as a string op 538 mr r11,r4 // move low word of 64-bit user address to r11 539 crmove kkNull,cr0_eq // remember if (dst==NULL) 540 stw r0,0(r7) // initialize #bytes moved 541 crset kkIn // flag as copyin (rather than copyout) 542 b copyJoin1 // skip over the "crclr kkNull" 543 544 545//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 546/* 547 * int 548 * copyout(src, dst, count) 549 * vm_offset_t src; // r3 550 * addr64_t dst; // r4 and r5 551 * size_t count; // r6 552 */ 553 554 .align 5 555 .globl EXT(copyout) 556 .globl EXT(copyoutmsg) 557 558LEXT(copyout) 559LEXT(copyoutmsg) 560 561#if INSTRUMENT 562 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[12] - Take stamp at copyout 563 stw r12,0x6100+(12*16)+0x0(0) ; INSTRUMENT - Save it 564 mfspr r12,pmc2 ; INSTRUMENT - Get stamp 565 stw r12,0x6100+(12*16)+0x4(0) ; INSTRUMENT - Save it 566 mfspr r12,pmc3 ; INSTRUMENT - Get stamp 567 stw r12,0x6100+(12*16)+0x8(0) ; INSTRUMENT - Save it 568 mfspr r12,pmc4 ; INSTRUMENT - Get stamp 569 stw r12,0x6100+(12*16)+0xC(0) ; INSTRUMENT - Save it 570#endif 571 mfcr r2,0x10 // save caller's cr3, which we use for flags 572 mr r10,r4 // move high word of 64-bit user address to r10 573 crclr kkString // not a string version 574 mr r11,r5 // move low word of 64-bit user address to r11 575 crclr kkIn // flag as copyout 576 b copyJoin 577 578 579//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 580/* 581 * int 582 * copyin(src, dst, count) 583 * addr64_t src; // r3 and r4 584 * vm_offset_t dst; // r5 585 * size_t count; // r6 586 */ 587 588 589 .align 5 590 .globl EXT(copyin) 591 .globl EXT(copyinmsg) 592 593LEXT(copyin) 594LEXT(copyinmsg) 595 596 mfcr r2,0x10 // save caller's cr3, which we use for flags 597 mr r10,r3 // move high word of 64-bit user address to r10 598 crclr kkString // not a string version 599 mr r11,r4 // move low word of 64-bit user address to r11 600 crset kkIn // flag as copyin 601 602 603// Common code to handle setup for all the copy variants: 604// r2 = caller's cr3 605// r3 = source if copyout 606// r5 = dest if copyin 607// r6 = buffer length or count 608// r7 = count output ptr (if kkString set) 609// r10 = high word of 64-bit user-space address (source if copyin, dest if copyout) 610// r11 = low word of 64-bit user-space address 611// cr3 = kkIn, kkString, kkNull flags 612 613copyJoin: 614 crclr kkNull // (dst==NULL) convention not used with this call 615copyJoin1: // enter from copyinstr with kkNull set 616 mflr r0 // get return address 617 cmplwi r6,0 // buffer length 0? 618 lis r9,0x1000 // r9 <- 0x10000000 (256MB) 619 stw r0,FM_LR_SAVE(r1) // save return 620 cmplw cr1,r6,r9 // buffer length > 256MB ? 621 mfsprg r8,2 // get the features 622 beq-- copyinout_0 // 0 length is degenerate case 623 stwu r1,-kkFrameSize(r1) // set up stack frame 624 stw r2,kkCR3(r1) // save caller's cr3, which we use for flags 625 mtcrf 0x02,r8 // move pf64Bit to cr6 626 stw r3,kkSource(r1) // save args across MapUserMemoryWindow 627 stw r5,kkDest(r1) 628 stw r6,kkBufSize(r1) 629 crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor 630 stw r7,kkCountPtr(r1) 631 stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr 632 633 634 635// Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout. 636// The string ops are passed -1 lengths by some BSD callers, so for them we silently clamp 637// the buffer length to 256MB. This isn't an issue if the string is less than 256MB 638// (as most are!), but if they are >256MB we eventually return ENAMETOOLONG. This restriction 639// is due to MapUserMemoryWindow; we don't want to consume more than two segments for 640// the mapping. 641 642 ble++ cr1,copyin0 // skip if buffer length <= 256MB 643 bf kkString,copyinout_too_big // error if not string op 644 mr r6,r9 // silently clamp buffer length to 256MB 645 stw r9,kkBufSize(r1) // update saved copy too 646 647 648// Set up thread_recover in case we hit an illegal address. 649 650copyin0: 651 li r31,0 // no mapped ptr yet 652 mfsprg r8,1 // Get the current thread 653 lis r2,hi16(copyinout_error) 654 ori r2,r2,lo16(copyinout_error) 655 lwz r4,THREAD_RECOVER(r8) 656 lwz r3,ACT_VMMAP(r8) // r3 <- vm_map virtual address 657 stw r2,THREAD_RECOVER(r8) 658 stw r4,kkThrErrJmp(r1) 659 660 661// Map user segment into kernel map, turn on 64-bit mode. At this point: 662// r3 = vm map 663// r6 = buffer length 664// r10/r11 = 64-bit user-space ptr (source if copyin, dest if copyout) 665// 666// When we call MapUserMemoryWindow, we pass: 667// r3 = vm map ptr 668// r4/r5 = 64-bit user space address as an addr64_t 669 670 mr r4,r10 // copy user ptr into r4/r5 671 mr r5,r11 672#if INSTRUMENT 673 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[13] - Take stamp before mapuseraddressspace 674 stw r12,0x6100+(13*16)+0x0(0) ; INSTRUMENT - Save it 675 mfspr r12,pmc2 ; INSTRUMENT - Get stamp 676 stw r12,0x6100+(13*16)+0x4(0) ; INSTRUMENT - Save it 677 mfspr r12,pmc3 ; INSTRUMENT - Get stamp 678 stw r12,0x6100+(13*16)+0x8(0) ; INSTRUMENT - Save it 679 mfspr r12,pmc4 ; INSTRUMENT - Get stamp 680 stw r12,0x6100+(13*16)+0xC(0) ; INSTRUMENT - Save it 681#endif 682 bl EXT(MapUserMemoryWindow) // get r3/r4 <- 64-bit address in kernel map of user operand 683#if INSTRUMENT 684 mfspr r12,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapuseraddressspace 685 stw r12,0x6100+(14*16)+0x0(0) ; INSTRUMENT - Save it 686 mfspr r12,pmc2 ; INSTRUMENT - Get stamp 687 stw r12,0x6100+(14*16)+0x4(0) ; INSTRUMENT - Save it 688 mfspr r12,pmc3 ; INSTRUMENT - Get stamp 689 stw r12,0x6100+(14*16)+0x8(0) ; INSTRUMENT - Save it 690 mfspr r12,pmc4 ; INSTRUMENT - Get stamp 691 stw r12,0x6100+(14*16)+0xC(0) ; INSTRUMENT - Save it 692#endif 693 mr r31,r4 // r31 <- mapped ptr into user space (may be 64-bit) 694 bf-- kk64bit,copyin1 // skip if a 32-bit processor 695 696 rldimi r31,r3,32,0 // slam high-order bits into mapped ptr 697 mfmsr r4 // if 64-bit, turn on SF so we can use returned ptr 698 li r0,1 699 rldimi r4,r0,63,MSR_SF_BIT // light bit 0 700 mtmsrd r4 // turn on 64-bit mode 701 isync // wait for mode to change 702 703 704// Load r3-r5, substituting mapped ptr as appropriate. 705 706copyin1: 707 lwz r5,kkBufSize(r1) // restore length to copy 708 bf kkIn,copyin2 // skip if copyout 709 lwz r4,kkDest(r1) // copyin: dest is kernel ptr 710 mr r3,r31 // source is mapped ptr 711 b copyin3 712copyin2: // handle copyout 713 lwz r3,kkSource(r1) // source is kernel buffer (r3 at entry) 714 mr r4,r31 // dest is mapped ptr into user space 715 716 717// Finally, all set up to copy: 718// r3 = source ptr (mapped if copyin) 719// r4 = dest ptr (mapped if copyout) 720// r5 = length 721// r31 = mapped ptr returned by MapUserMemoryWindow 722// cr3 = kkIn, kkString, kk64bit, and kkNull flags 723 724copyin3: 725 bt kkString,copyString // handle copyinstr and copyoutstr 726 bl EXT(bcopy) // copyin and copyout: let bcopy do the work 727 li r3,0 // return success 728 729 730// Main exit point for copyin, copyout, copyinstr, and copyoutstr. Also reached 731// from error recovery if we get a DSI accessing user space. Clear recovery ptr, 732// and pop off frame. 733// r3 = 0, EFAULT, or ENAMETOOLONG 734 735copyinx: 736 lwz r2,kkCR3(r1) // get callers cr3 737 mfsprg r6,1 // Get the current thread 738 bf-- kk64bit,copyinx1 // skip if 32-bit processor 739 mfmsr r12 740 rldicl r12,r12,0,MSR_SF_BIT+1 // if 64-bit processor, turn 64-bit mode off 741 mtmsrd r12 // turn SF off 742 isync // wait for the mode to change 743copyinx1: 744 lwz r0,FM_LR_SAVE+kkFrameSize(r1) // get return address 745 lwz r31,kkR31Save(r1) // restore callers r31 746 lwz r4,kkThrErrJmp(r1) // load saved thread recover 747 addi r1,r1,kkFrameSize // pop off our stack frame 748 mtlr r0 749 stw r4,THREAD_RECOVER(r6) // restore thread recover 750 mtcrf 0x10,r2 // restore cr3 751 blr 752 753 754/* We get here via the exception handler if an illegal 755 * user memory reference was made. This error handler is used by 756 * copyin, copyout, copyinstr, and copyoutstr. Registers are as 757 * they were at point of fault, so for example cr3 flags are valid. 758 */ 759 760copyinout_error: 761 li r3,EFAULT // return error 762 b copyinx 763 764copyinout_0: // degenerate case: 0-length copy 765 mtcrf 0x10,r2 // restore cr3 766 li r3,0 // return success 767 blr 768 769copyinout_too_big: // degenerate case 770 mtcrf 0x10,r2 // restore cr3 771 lwz r1,0(r1) // pop off stack frame 772 li r3,ENAMETOOLONG 773 blr 774 775 776//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 777// Handle copyinstr and copyoutstr. At this point the stack frame is set up, 778// the recovery ptr is set, the user's buffer is mapped, we're in 64-bit mode 779// if necessary, and: 780// r3 = source ptr, mapped if copyinstr 781// r4 = dest ptr, mapped if copyoutstr 782// r5 = buffer length 783// r31 = mapped ptr returned by MapUserMemoryWindow 784// cr3 = kkIn, kkString, kkNull, and kk64bit flags 785// We do word copies unless the buffer is very short, then use a byte copy loop 786// for the leftovers if necessary. The crossover at which the word loop becomes 787// faster is about seven bytes, counting the zero. 788// 789// We first must word-align the source ptr, in order to avoid taking a spurious 790// page fault. 791 792copyString: 793 cmplwi cr1,r5,15 // is buffer very short? 794 mr r12,r3 // remember ptr to 1st source byte 795 mtctr r5 // assuming short, set up loop count for bytes 796 blt-- cr1,copyinstr8 // too short for word loop 797 rlwinm r2,r3,0,0x3 // get byte offset of 1st byte within word 798 rlwinm r9,r3,3,0x18 // get bit offset of 1st byte within word 799 li r7,-1 800 sub r3,r3,r2 // word-align source address 801 add r6,r5,r2 // get length starting at byte 0 in word 802 srw r7,r7,r9 // get mask for bytes in first word 803 srwi r0,r6,2 // get #words in buffer 804 lwz r5,0(r3) // get aligned word with first source byte 805 lis r10,hi16(0xFEFEFEFF) // load magic constants into r10 and r11 806 lis r11,hi16(0x80808080) 807 mtctr r0 // set up word loop count 808 addi r3,r3,4 // advance past the source word 809 ori r10,r10,lo16(0xFEFEFEFF) 810 ori r11,r11,lo16(0x80808080) 811 orc r8,r5,r7 // map bytes preceeding first source byte into 0xFF 812 bt-- kkNull,copyinstr5enter // enter loop that just counts 813 814// Special case 1st word, which has been 0xFF filled on left. Note that we use 815// "and.", even though we execute both in 32 and 64-bit mode. This is OK. 816 817 slw r5,r5,r9 // left justify payload bytes 818 add r9,r10,r8 // r9 = data + 0xFEFEFEFF 819 andc r7,r11,r8 // r7 = ~data & 0x80808080 820 subfic r0,r2,4 // get r0 <- #payload bytes in 1st word 821 and. r7,r9,r7 // if r7==0, then all bytes in r8 are nonzero 822 stw r5,0(r4) // copy payload bytes to dest buffer 823 add r4,r4,r0 // then point to next byte in dest buffer 824 bdnzt cr0_eq,copyinstr6 // use loop that copies if 0 not found 825 826 b copyinstr7 // 0 found (buffer can't be full) 827 828 829// Word loop(s). They do a word-parallel search for 0s, using the following 830// inobvious but very efficient test: 831// y = data + 0xFEFEFEFF 832// z = ~data & 0x80808080 833// If (y & z)==0, then all bytes in dataword are nonzero. There are two copies 834// of this loop, one that just counts and another that copies. 835// r3 = ptr to next word of source (word aligned) 836// r4 = ptr to next byte in buffer 837// r6 = original buffer length (adjusted to be word origin) 838// r10 = 0xFEFEFEFE 839// r11 = 0x80808080 840// r12 = ptr to 1st source byte (used to determine string length) 841 842 .align 5 // align inner loops for speed 843copyinstr5: // version that counts but does not copy 844 lwz r8,0(r3) // get next word of source 845 addi r3,r3,4 // advance past it 846copyinstr5enter: 847 add r9,r10,r8 // r9 = data + 0xFEFEFEFF 848 andc r7,r11,r8 // r7 = ~data & 0x80808080 849 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode) 850 bdnzt cr0_eq,copyinstr5 // if r7==0, then all bytes in r8 are nonzero 851 852 b copyinstr7 853 854 .align 5 // align inner loops for speed 855copyinstr6: // version that counts and copies 856 lwz r8,0(r3) // get next word of source 857 addi r3,r3,4 // advance past it 858 addi r4,r4,4 // increment dest ptr while we wait for data 859 add r9,r10,r8 // r9 = data + 0xFEFEFEFF 860 andc r7,r11,r8 // r7 = ~data & 0x80808080 861 and. r7,r9,r7 // r7 = r9 & r7 ("." ok even in 64-bit mode) 862 stw r8,-4(r4) // pack all 4 bytes into buffer 863 bdnzt cr0_eq,copyinstr6 // if r7==0, then all bytes are nonzero 864 865 866// Either 0 found or buffer filled. The above algorithm has mapped nonzero bytes to 0 867// and 0 bytes to 0x80 with one exception: 0x01 bytes preceeding the first 0 are also 868// mapped to 0x80. We must mask out these false hits before searching for an 0x80 byte. 869// r3 = word aligned ptr to next word of source (ie, r8==mem(r3-4)) 870// r6 = original buffer length (adjusted to be word origin) 871// r7 = computed vector of 0x00 and 0x80 bytes 872// r8 = original source word, coming from -4(r3), possibly padded with 0xFFs on left if 1st word 873// r12 = ptr to 1st source byte (used to determine string length) 874// cr0 = beq set iff 0 not found 875 876copyinstr7: 877 rlwinm r2,r8,7,0,31 // move 0x01 bits to 0x80 position 878 rlwinm r6,r6,0,0x3 // mask down to partial byte count in last word 879 andc r7,r7,r2 // turn off false hits from 0x0100 worst case 880 crnot kkZero,cr0_eq // 0 found iff cr0_eq is off 881 srwi r7,r7,8 // we want to count the 0 as a byte xferred 882 cmpwi r6,0 // any bytes left over in last word? 883 cntlzw r7,r7 // now we can find the 0 byte (ie, the 0x80) 884 subi r3,r3,4 // back up r3 to point to 1st byte in r8 885 srwi r7,r7,3 // convert 8,16,24,32 to 1,2,3,4 886 add r3,r3,r7 // now r3 points one past 0 byte, or at 1st byte not xferred 887 bt++ kkZero,copyinstr10 // 0 found, so done 888 889 beq copyinstr10 // r6==0, so buffer truly full 890 mtctr r6 // 0 not found, loop over r6 bytes 891 b copyinstr8 // enter byte loop for last 1-3 leftover bytes 892 893 894// Byte loop. This is used for very small buffers and for the odd bytes left over 895// after searching and copying words at a time. 896// r3 = ptr to next byte of source 897// r4 = ptr to next dest byte 898// r12 = ptr to first byte of source 899// ctr = count of bytes to check 900 901 .align 5 // align inner loops for speed 902copyinstr8: // loop over bytes of source 903 lbz r0,0(r3) // get next byte of source 904 addi r3,r3,1 905 addi r4,r4,1 // increment dest addr whether we store or not 906 cmpwi r0,0 // the 0? 907 bt-- kkNull,copyinstr9 // don't store if copyinstr with NULL ptr 908 stb r0,-1(r4) 909copyinstr9: 910 bdnzf cr0_eq,copyinstr8 // loop if byte not 0 and more room in buffer 911 912 crmove kkZero,cr0_eq // remember if 0 found or buffer filled 913 914 915// Buffer filled or 0 found. Unwind and return. 916// r3 = ptr to 1st source byte not transferred 917// r12 = ptr to 1st source byte 918// r31 = mapped ptr returned by MapUserMemoryWindow 919// cr3 = kkZero set iff 0 found 920 921copyinstr10: 922 lwz r9,kkCountPtr(r1) // get ptr to place to store count of bytes moved 923 sub r2,r3,r12 // compute #bytes copied (including the 0) 924 li r3,0 // assume success return status 925 stw r2,0(r9) // store #bytes moved 926 bt++ kkZero,copyinx // we did find the 0 so return 0 927 li r3,ENAMETOOLONG // buffer filled 928 b copyinx // join main exit routine 929 930//<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> 931/* 932 * int 933 * copypv(source, sink, size, which) 934 * addr64_t src; // r3 and r4 935 * addr64_t dst; // r5 and r6 936 * size_t size; // r7 937 * int which; // r8 938 * 939 * Operand size bytes are copied from operand src into operand dst. The source and 940 * destination operand addresses are given as addr64_t, and may designate starting 941 * locations in physical or virtual memory in any combination except where both are 942 * virtual. Virtual memory locations may be in either the kernel or the current thread's 943 * address space. Operand size may be up to 256MB. 944 * 945 * Operation is controlled by operand which, which offers these options: 946 * cppvPsrc : source operand is (1) physical or (0) virtual 947 * cppvPsnk : destination operand is (1) physical or (0) virtual 948 * cppvKmap : virtual operand is in (1) kernel or (0) current thread 949 * cppvFsnk : (1) flush destination before and after transfer 950 * cppvFsrc : (1) flush source before and after transfer 951 * cppvNoModSnk : (1) don't set source operand's changed bit(s) 952 * cppvNoRefSrc : (1) don't set destination operand's referenced bit(s) 953 * 954 * Implementation is now split into this new 64-bit path and the old path, hw_copypv_32(). 955 * This section describes the operation of the new 64-bit path. 956 * 957 * The 64-bit path utilizes the more capacious 64-bit kernel address space to create a 958 * window in the kernel address space into all of physical RAM plus the I/O hole. Since 959 * the window's mappings specify the proper access policies for the underlying memory, 960 * the new path does not have to flush caches to avoid a cache paradox, so cppvFsnk 961 * and cppvFsrc are ignored. Physical operand adresses are relocated into the physical 962 * memory window, and are accessed with data relocation on. Virtual addresses are either 963 * within the kernel, or are mapped into the kernel address space through the user memory 964 * window. Because accesses to a virtual operand are performed with data relocation on, 965 * the new path does not have to translate the address, disable/enable interrupts, lock 966 * the mapping, or update referenced and changed bits. 967 * 968 * The IBM 970 (a.k.a. G5) processor treats real-mode accesses as guarded, so there is 969 * a substantial performance penalty for copypv operating in real mode. Utilizing the 970 * new 64-bit path, transfer performance increases >100% on the G5. 971 * 972 * The attentive reader may notice that mtmsrd ops are not followed by isync ops as 973 * might be expected. The 970 follows PowerPC architecture version 2.01, which defines 974 * mtmsrd with L=0 as a context synchronizing op, so a following isync is no longer 975 * required. 976 * 977 * To keep things exciting, we develop 64-bit values in non-volatiles, but we also need 978 * to call 32-bit functions, which would lead to the high-order 32 bits of our values 979 * getting clobbered unless we do something special. So, we preserve our 64-bit non-volatiles 980 * in our own stack frame across calls to 32-bit functions. 981 * 982 */ 983 984// Map operand which bits into non-volatile CR2 and CR3 bits. 985#define whichAlign ((3+1)*4) 986#define whichMask 0x007F0000 987#define pvPsnk (cppvPsnkb - whichAlign) 988#define pvPsrc (cppvPsrcb - whichAlign) 989#define pvFsnk (cppvFsnkb - whichAlign) 990#define pvFsrc (cppvFsrcb - whichAlign) 991#define pvNoModSnk (cppvNoModSnkb - whichAlign) 992#define pvNoRefSrc (cppvNoRefSrcb - whichAlign) 993#define pvKmap (cppvKmapb - whichAlign) 994#define pvNoCache cr2_lt 995 996 .align 5 997 .globl EXT(copypv) 998 999LEXT(copypv) 1000 mfsprg r10,2 // get feature flags 1001 mtcrf 0x02,r10 // we need to test pf64Bit 1002 bt++ pf64Bitb,copypv_64 // skip if 64-bit processor (only they take hint) 1003 1004 b EXT(hw_copypv_32) // carry on with 32-bit copypv 1005 1006// Push a 32-bit ABI-compliant stack frame and preserve all non-volatiles that we'll clobber. 1007copypv_64: 1008 mfsprg r9,1 // get current thread 1009 stwu r1,-(FM_ALIGN((31-26+11)*4)+FM_SIZE)(r1) 1010 // allocate stack frame and link it 1011 mflr r0 // get return address 1012 mfcr r10 // get cr2 and cr3 1013 lwz r12,THREAD_RECOVER(r9) // get error callback 1014 stw r26,FM_ARG0+0x00(r1) // save non-volatile r26 1015 stw r27,FM_ARG0+0x04(r1) // save non-volatile r27 1016 stw r28,FM_ARG0+0x08(r1) // save non-volatile r28 1017 stw r29,FM_ARG0+0x0C(r1) // save non-volatile r29 1018 stw r30,FM_ARG0+0x10(r1) // save non-volatile r30 1019 stw r31,FM_ARG0+0x14(r1) // save non-volatile r31 1020 stw r12,FM_ARG0+0x20(r1) // save error callback 1021 stw r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1) 1022 // save return address 1023 stw r10,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1) 1024 // save non-volatile cr2 and cr3 1025 1026// Non-volatile register usage in this routine is: 1027// r26: saved msr image 1028// r27: current pmap_t / virtual source address 1029// r28: destination virtual address 1030// r29: source address 1031// r30: destination address 1032// r31: byte count to copy 1033// cr2/3: parameter 'which' bits 1034 1035 rlwinm r8,r8,whichAlign,whichMask // align and mask which bits 1036 mr r31,r7 // copy size to somewhere non-volatile 1037 mtcrf 0x20,r8 // insert which bits into cr2 and cr3 1038 mtcrf 0x10,r8 // insert which bits into cr2 and cr3 1039 rlwinm r29,r3,0,1,0 // form source address high-order bits 1040 rlwinm r30,r5,0,1,0 // form destination address high-order bits 1041 rlwimi r29,r4,0,0,31 // form source address low-order bits 1042 rlwimi r30,r6,0,0,31 // form destination address low-order bits 1043 crand cr7_lt,pvPsnk,pvPsrc // are both operand addresses physical? 1044 cntlzw r0,r31 // count leading zeroes in byte count 1045 cror cr7_eq,pvPsnk,pvPsrc // cr7_eq <- source or destination is physical 1046 bf-- cr7_eq,copypv_einval // both operands may not be virtual 1047 cmplwi r0,4 // byte count greater than or equal 256M (2**28)? 1048 blt-- copypv_einval // byte count too big, give EINVAL 1049 cmplwi r31,0 // byte count zero? 1050 beq-- copypv_zero // early out 1051 bt cr7_lt,copypv_phys // both operand addresses are physical 1052 mr r28,r30 // assume destination is virtual 1053 bf pvPsnk,copypv_dv // is destination virtual? 1054 mr r28,r29 // no, so source must be virtual 1055copypv_dv: 1056 lis r27,ha16(EXT(kernel_pmap)) // get kernel's pmap_t *, high-order 1057 lwz r27,lo16(EXT(kernel_pmap))(r27) // get kernel's pmap_t 1058 bt pvKmap,copypv_kern // virtual address in kernel map? 1059 lwz r3,ACT_VMMAP(r9) // get user's vm_map * 1060 rldicl r4,r28,32,32 // r4, r5 <- addr64_t virtual address 1061 rldicl r5,r28,0,32 1062 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit call 1063 std r30,FM_ARG0+0x38(r1) // preserve 64-bit r30 across 32-bit call 1064 bl EXT(MapUserMemoryWindow) // map slice of user space into kernel space 1065 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29 1066 ld r30,FM_ARG0+0x38(r1) // restore 64-bit r30 1067 rlwinm r28,r3,0,1,0 // convert relocated addr64_t virtual address 1068 rlwimi r28,r4,0,0,31 // into a single 64-bit scalar 1069copypv_kern: 1070 1071// Since we'll be accessing the virtual operand with data-relocation on, we won't need to 1072// update the referenced and changed bits manually after the copy. So, force the appropriate 1073// flag bit on for the virtual operand. 1074 crorc pvNoModSnk,pvNoModSnk,pvPsnk // for virtual dest, let hardware do ref/chg bits 1075 crorc pvNoRefSrc,pvNoRefSrc,pvPsrc // for virtual source, let hardware do ref bit 1076 1077// We'll be finding a mapping and looking at, so we need to disable 'rupts. 1078 lis r0,hi16(MASK(MSR_VEC)) // get vector mask 1079 ori r0,r0,lo16(MASK(MSR_FP)) // insert fp mask 1080 mfmsr r26 // save current msr 1081 andc r26,r26,r0 // turn off VEC and FP in saved copy 1082 ori r0,r0,lo16(MASK(MSR_EE)) // add EE to our mask 1083 andc r0,r26,r0 // disable EE in our new msr image 1084 mtmsrd r0 // introduce new msr image 1085 1086// We're now holding the virtual operand's pmap_t in r27 and its virtual address in r28. We now 1087// try to find a mapping corresponding to this address in order to determine whether the address 1088// is cacheable. If we don't find a mapping, we can safely assume that the operand is cacheable 1089// (a non-cacheable operand must be a block mapping, which will always exist); otherwise, we 1090// examine the mapping's caching-inhibited bit. 1091 mr r3,r27 // r3 <- pmap_t pmap 1092 rldicl r4,r28,32,32 // r4, r5 <- addr64_t va 1093 rldicl r5,r28,0,32 1094 la r6,FM_ARG0+0x18(r1) // r6 <- addr64_t *nextva 1095 li r7,1 // r7 <- int full, search nested mappings 1096 std r26,FM_ARG0+0x28(r1) // preserve 64-bit r26 across 32-bit calls 1097 std r28,FM_ARG0+0x30(r1) // preserve 64-bit r28 across 32-bit calls 1098 std r29,FM_ARG0+0x38(r1) // preserve 64-bit r29 across 32-bit calls 1099 std r30,FM_ARG0+0x40(r1) // preserve 64-bit r30 across 32-bit calls 1100 bl EXT(mapping_find) // find mapping for virtual operand 1101 mr. r3,r3 // did we find it? 1102 beq copypv_nomapping // nope, so we'll assume it's cacheable 1103 lwz r4,mpVAddr+4(r3) // get low half of virtual addr for hw flags 1104 rlwinm. r4,r4,0,mpIb-32,mpIb-32 // caching-inhibited bit set? 1105 crnot pvNoCache,cr0_eq // if it is, use bcopy_nc 1106 bl EXT(mapping_drop_busy) // drop busy on the mapping 1107copypv_nomapping: 1108 ld r26,FM_ARG0+0x28(r1) // restore 64-bit r26 1109 ld r28,FM_ARG0+0x30(r1) // restore 64-bit r28 1110 ld r29,FM_ARG0+0x38(r1) // restore 64-bit r29 1111 ld r30,FM_ARG0+0x40(r1) // restore 64-bit r30 1112 mtmsrd r26 // restore msr to it's previous state 1113 1114// Set both the source and destination virtual addresses to the virtual operand's address -- 1115// we'll overlay one of them with the physical operand's address. 1116 mr r27,r28 // make virtual operand BOTH source AND destination 1117 1118// Now we're ready to relocate the physical operand address(es) into the physical memory window. 1119// Recall that we've mapped physical memory (including the I/O hole) into the kernel's address 1120// space somewhere at or over the 2**32 line. If one or both of the operands are in the I/O hole, 1121// we'll set the pvNoCache flag, forcing use of non-caching bcopy_nc() to do the copy. 1122copypv_phys: 1123 ld r6,lgPMWvaddr(0) // get physical memory window virtual address 1124 bf pvPsnk,copypv_dstvirt // is destination address virtual? 1125 cntlzd r4,r30 // count leading zeros in destination address 1126 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1) 1127 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations 1128 add r28,r30,r6 // relocate physical destination into physical window 1129copypv_dstvirt: 1130 bf pvPsrc,copypv_srcvirt // is source address virtual? 1131 cntlzd r4,r29 // count leading zeros in source address 1132 cmplwi r4,32 // if it's 32, then it's in the I/O hole (2**30 to 2**31-1) 1133 cror pvNoCache,cr0_eq,pvNoCache // use bcopy_nc for I/O hole locations 1134 add r27,r29,r6 // relocate physical source into physical window 1135copypv_srcvirt: 1136 1137// Once the copy is under way (bcopy or bcopy_nc), we will want to get control if anything 1138// funny happens during the copy. So, we set a pointer to our error handler in the per-thread 1139// control block. 1140 mfsprg r8,1 // get current threads stuff 1141 lis r3,hi16(copypv_error) // get our error callback's address, high 1142 ori r3,r3,lo16(copypv_error) // get our error callback's address, low 1143 stw r3,THREAD_RECOVER(r8) // set our error callback 1144 1145// Since our physical operand(s) are relocated at or above the 2**32 line, we must enter 1146// 64-bit mode. 1147 li r0,1 // get a handy one bit 1148 mfmsr r3 // get current msr 1149 rldimi r3,r0,63,MSR_SF_BIT // set SF bit on in our msr copy 1150 mtmsrd r3 // enter 64-bit mode 1151 1152// If requested, flush data cache 1153// Note that we don't flush, the code is being saved "just in case". 1154#if 0 1155 bf pvFsrc,copypv_nfs // do we flush the source? 1156 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address 1157 rldicl r4,r27,0,32 1158 mr r5,r31 // r5 <- count (in bytes) 1159 li r6,0 // r6 <- boolean phys (false, not physical) 1160 bl EXT(flush_dcache) // flush the source operand 1161copypv_nfs: 1162 bf pvFsnk,copypv_nfdx // do we flush the destination? 1163 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address 1164 rldicl r4,r28,0,32 1165 mr r5,r31 // r5 <- count (in bytes) 1166 li r6,0 // r6 <- boolean phys (false, not physical) 1167 bl EXT(flush_dcache) // flush the destination operand 1168copypv_nfdx: 1169#endif 1170 1171// Call bcopy or bcopy_nc to perform the copy. 1172 mr r3,r27 // r3 <- source virtual address 1173 mr r4,r28 // r4 <- destination virtual address 1174 mr r5,r31 // r5 <- bytes to copy 1175 bt pvNoCache,copypv_nc // take non-caching route 1176 bl EXT(bcopy) // call bcopy to do the copying 1177 b copypv_copydone 1178copypv_nc: 1179 bl EXT(bcopy_nc) // call bcopy_nc to do the copying 1180copypv_copydone: 1181 1182// If requested, flush data cache 1183// Note that we don't flush, the code is being saved "just in case". 1184#if 0 1185 bf pvFsrc,copypv_nfsx // do we flush the source? 1186 rldicl r3,r27,32,32 // r3, r4 <- addr64_t source virtual address 1187 rldicl r4,r27,0,32 1188 mr r5,r31 // r5 <- count (in bytes) 1189 li r6,0 // r6 <- boolean phys (false, not physical) 1190 bl EXT(flush_dcache) // flush the source operand 1191copypv_nfsx: 1192 bf pvFsnk,copypv_nfd // do we flush the destination? 1193 rldicl r3,r28,32,32 // r3, r4 <- addr64_t destination virtual address 1194 rldicl r4,r28,0,32 1195 mr r5,r31 // r5 <- count (in bytes) 1196 li r6,0 // r6 <- boolean phys (false, not physical) 1197 bl EXT(flush_dcache) // flush the destination operand 1198copypv_nfd: 1199#endif 1200 1201// Leave 64-bit mode. 1202 mfmsr r3 // get current msr 1203 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy 1204 mtmsrd r3 // leave 64-bit mode 1205 1206// If requested, set ref/chg on source/dest physical operand(s). It is possible that copy is 1207// from/to a RAM disk situated outside of mapped physical RAM, so we check each page by calling 1208// mapping_phys_lookup() before we try to set its ref/chg bits; otherwise, we might panic. 1209// Note that this code is page-size sensitive, so it should probably be a part of our low-level 1210// code in hw_vm.s. 1211 bt pvNoModSnk,copypv_nomod // skip destination update if not requested 1212 std r29,FM_ARG0+0x30(r1) // preserve 64-bit r29 across 32-bit calls 1213 li r26,1 // r26 <- 4K-page count 1214 mr r27,r31 // r27 <- byte count 1215 rlwinm r3,r30,0,20,31 // does destination cross a page boundary? 1216 subfic r3,r3,4096 // 1217 cmplw r3,r27 // 1218 blt copypv_modnox // skip if not crossing case 1219 subf r27,r3,r27 // r27 <- byte count less initial fragment 1220 addi r26,r26,1 // increment page count 1221copypv_modnox: 1222 srdi r3,r27,12 // pages to update (not including crosser) 1223 add r26,r26,r3 // add in crosser 1224 srdi r27,r30,12 // r27 <- destination page number 1225copypv_modloop: 1226 mr r3,r27 // r3 <- destination page number 1227 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex 1228 bl EXT(mapping_phys_lookup) // see if page is really there 1229 mr. r3,r3 // is it? 1230 beq-- copypv_modend // nope, break out of modify loop 1231 mr r3,r27 // r3 <- destination page number 1232 bl EXT(mapping_set_mod) // set page changed status 1233 subi r26,r26,1 // decrement page count 1234 cmpwi r26,0 // done yet? 1235 bgt copypv_modloop // nope, iterate 1236copypv_modend: 1237 ld r29,FM_ARG0+0x30(r1) // restore 64-bit r29 1238copypv_nomod: 1239 bt pvNoRefSrc,copypv_done // skip source update if not requested 1240copypv_debugref: 1241 li r26,1 // r26 <- 4K-page count 1242 mr r27,r31 // r27 <- byte count 1243 rlwinm r3,r29,0,20,31 // does source cross a page boundary? 1244 subfic r3,r3,4096 // 1245 cmplw r3,r27 // 1246 blt copypv_refnox // skip if not crossing case 1247 subf r27,r3,r27 // r27 <- byte count less initial fragment 1248 addi r26,r26,1 // increment page count 1249copypv_refnox: 1250 srdi r3,r27,12 // pages to update (not including crosser) 1251 add r26,r26,r3 // add in crosser 1252 srdi r27,r29,12 // r27 <- source page number 1253copypv_refloop: 1254 mr r3,r27 // r3 <- source page number 1255 la r4,FM_ARG0+0x18(r1) // r4 <- unsigned int *pindex 1256 bl EXT(mapping_phys_lookup) // see if page is really there 1257 mr. r3,r3 // is it? 1258 beq-- copypv_done // nope, break out of modify loop 1259 mr r3,r27 // r3 <- source page number 1260 bl EXT(mapping_set_ref) // set page referenced status 1261 subi r26,r26,1 // decrement page count 1262 cmpwi r26,0 // done yet? 1263 bgt copypv_refloop // nope, iterate 1264 1265// Return, indicating success. 1266copypv_done: 1267copypv_zero: 1268 li r3,0 // our efforts were crowned with success 1269 1270// Pop frame, restore caller's non-volatiles, clear recovery routine pointer. 1271copypv_return: 1272 mfsprg r9,1 // get current threads stuff 1273 lwz r0,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_LR_SAVE)(r1) 1274 // get return address 1275 lwz r4,(FM_ALIGN((31-26+11)*4)+FM_SIZE+FM_CR_SAVE)(r1) 1276 // get non-volatile cr2 and cr3 1277 lwz r26,FM_ARG0+0x00(r1) // restore non-volatile r26 1278 lwz r27,FM_ARG0+0x04(r1) // restore non-volatile r27 1279 mtlr r0 // restore return address 1280 lwz r28,FM_ARG0+0x08(r1) // restore non-volatile r28 1281 mtcrf 0x20,r4 // restore non-volatile cr2 1282 mtcrf 0x10,r4 // restore non-volatile cr3 1283 lwz r11,FM_ARG0+0x20(r1) // save error callback 1284 lwz r29,FM_ARG0+0x0C(r1) // restore non-volatile r29 1285 lwz r30,FM_ARG0+0x10(r1) // restore non-volatile r30 1286 lwz r31,FM_ARG0+0x14(r1) // restore non-volatile r31 1287 stw r11,THREAD_RECOVER(r9) // restore our error callback 1288 lwz r1,0(r1) // release stack frame 1289 1290 blr // y'all come back now 1291 1292// Invalid argument handler. 1293copypv_einval: 1294 li r3,EINVAL // invalid argument 1295 b copypv_return // return 1296 1297// Error encountered during bcopy or bcopy_nc. 1298copypv_error: 1299 mfmsr r3 // get current msr 1300 rldicl r3,r3,0,MSR_SF_BIT+1 // clear SF bit in our copy 1301 mtmsrd r3 // leave 64-bit mode 1302 li r3,EFAULT // it was all his fault 1303 b copypv_return // return 1304