1/* 2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* ======================================= 29 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X 30 * ======================================= 31 * 32 * Version of 2/20/2003, tuned for G3. 33 * 34 * Register usage. Note we use R2, so this code will not run in a PEF/CFM 35 * environment. 36 * 37 * r0 = "w7" or temp 38 * r2 = "w8" 39 * r3 = not used, as memcpy and memmove return 1st parameter as a value 40 * r4 = source ptr ("rs") 41 * r5 = count of bytes to move ("rc") 42 * r6 = "w1" 43 * r7 = "w2" 44 * r8 = "w3" 45 * r9 = "w4" 46 * r10 = "w5" 47 * r11 = "w6" 48 * r12 = destination ptr ("rd") 49 * f0-f3 = used for moving 8-byte aligned data 50 */ 51#define rs r4 // NB: we depend on rs==r4 in "lswx" instructions 52#define rd r12 53#define rc r5 54 55#define w1 r6 56#define w2 r7 57#define w3 r8 58#define w4 r9 59#define w5 r10 60#define w6 r11 61#define w7 r0 62#define w8 r2 63 64#include <sys/appleapiopts.h> 65#include <ppc/asm.h> 66#include <machine/cpu_capabilities.h> 67#include <machine/commpage.h> 68 69 .text 70 71 72#define kLong 33 // too long for string ops 73 74 75// Main entry points. 76 77 .align 5 78bcopy_g3: // void bcopy(const void *src, void *dst, size_t len) 79 cmplwi rc,kLong // length > 32 bytes? 80 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc 81 mr rd,r4 // start to move source & dest to canonic spot 82 bge LLong0 // skip if long operand 83 mtxer rc // set length for string ops 84 lswx r5,0,r3 // load bytes into r5-r12 85 stswx r5,0,r4 // store them 86 blr 87 88// NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page. 89 90 .align 5 91Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len) 92Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len) 93 cmplwi rc,kLong // length > 32 bytes? 94 sub w1,r3,rs // must move in reverse if (rd-rs)<rc 95 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc 96 bge LLong1 // longer than 32 bytes 97 mtxer rc // set length for string ops 98 lswx r5,0,r4 // load bytes into r5-r12 99 stswx r5,0,r3 // store them 100 blr 101 102// Long operands (more than 32 bytes.) 103// w1 = (rd-rs), used to check for alignment 104 105LLong0: // enter from bcopy() 106 mr rs,r3 // must leave r3 alone (it is return value for memcpy) 107LLong1: // enter from memcpy() and memmove() 108 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse 109 rlwinm r0,w1,0,0x3 // are operands relatively word-aligned? 110 neg w2,rd // prepare to align destination 111 cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned 112 blt cr1,LLongReverse // handle reverse move 113 andi. w4,w2,3 // w4 <- #bytes to word align destination 114 beq cr5,LLongFloat // relatively aligned so use FPRs 115 sub rc,rc,w4 // adjust count for alignment 116 srwi r0,rc,5 // get #chunks to xfer (>=1) 117 rlwinm rc,rc,0,0x1F // mask down to leftover bytes 118 mtctr r0 // set up loop count 119 beq 1f // dest already word aligned 120 121// Word align the destination. 122 123 mtxer w4 // byte count to xer 124 cmpwi r0,0 // any chunks to xfer? 125 lswx w1,0,rs // move w4 bytes to align dest 126 add rs,rs,w4 127 stswx w1,0,rd 128 add rd,rd,w4 129 beq- 2f // pathologic case, no chunks to xfer 130 131// Forward, unaligned loop. 132 1331: 134 lwz w1,0(rs) 135 lwz w2,4(rs) 136 lwz w3,8(rs) 137 lwz w4,12(rs) 138 lwz w5,16(rs) 139 lwz w6,20(rs) 140 lwz w7,24(rs) 141 lwz w8,28(rs) 142 addi rs,rs,32 143 stw w1,0(rd) 144 stw w2,4(rd) 145 stw w3,8(rd) 146 stw w4,12(rd) 147 stw w5,16(rd) 148 stw w6,20(rd) 149 stw w7,24(rd) 150 stw w8,28(rd) 151 addi rd,rd,32 152 bdnz 1b 1532: // rc = remaining bytes (0-31) 154 mtxer rc // set up count for string ops 155 mr r0,rd // move dest ptr out of the way 156 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4) 157 stswx r5,0,r0 // store them 158 blr 159 160 161 162// Forward, aligned loop. We use FPRs. 163 164LLongFloat: 165 andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination 166 sub rc,rc,w4 // adjust count for alignment 167 srwi r0,rc,5 // number of 32-byte chunks to xfer 168 rlwinm rc,rc,0,0x1F // mask down to leftover bytes 169 mtctr r0 // set up loop count 170 beq 1f // dest already doubleword aligned 171 172// Doubleword align the destination. 173 174 mtxer w4 // byte count to xer 175 cmpwi r0,0 // any chunks to xfer? 176 lswx w1,0,rs // move w4 bytes to align dest 177 add rs,rs,w4 178 stswx w1,0,rd 179 add rd,rd,w4 180 beq- 2f // pathologic case, no chunks to xfer 1811: // loop over 32-byte chunks 182 lfd f0,0(rs) 183 lfd f1,8(rs) 184 lfd f2,16(rs) 185 lfd f3,24(rs) 186 addi rs,rs,32 187 stfd f0,0(rd) 188 stfd f1,8(rd) 189 stfd f2,16(rd) 190 stfd f3,24(rd) 191 addi rd,rd,32 192 bdnz 1b 1932: // rc = remaining bytes (0-31) 194 mtxer rc // set up count for string ops 195 mr r0,rd // move dest ptr out of the way 196 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4) 197 stswx r5,0,r0 // store them 198 blr 199 200 201// Long, reverse moves. 202// cr5 = beq if relatively word aligned 203 204LLongReverse: 205 add rd,rd,rc // point to end of operands + 1 206 add rs,rs,rc 207 beq cr5,LReverseFloat // aligned operands so can use FPRs 208 srwi r0,rc,5 // get chunk count 209 rlwinm rc,rc,0,0x1F // mask down to leftover bytes 210 mtctr r0 // set up loop count 211 mtxer rc // set up for trailing bytes 2121: 213 lwz w1,-4(rs) 214 lwz w2,-8(rs) 215 lwz w3,-12(rs) 216 lwz w4,-16(rs) 217 stw w1,-4(rd) 218 lwz w5,-20(rs) 219 stw w2,-8(rd) 220 lwz w6,-24(rs) 221 stw w3,-12(rd) 222 lwz w7,-28(rs) 223 stw w4,-16(rd) 224 lwzu w8,-32(rs) 225 stw w5,-20(rd) 226 stw w6,-24(rd) 227 stw w7,-28(rd) 228 stwu w8,-32(rd) 229 bdnz 1b 230 231 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31) 232 sub r0,rd,rc // move dest ptr out of way 233 lswx r5,0,r4 // load xer bytes into r5-r12 234 stswx r5,0,r0 // store them 235 blr 236 237 238// Long, reverse aligned moves. We use FPRs. 239 240LReverseFloat: 241 andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination 242 sub rc,rc,w4 // adjust count for alignment 243 srwi r0,rc,5 // number of 32-byte chunks to xfer 244 rlwinm rc,rc,0,0x1F // mask down to leftover bytes 245 mtctr r0 // set up loop count 246 beq 1f // dest already doubleword aligned 247 248// Doubleword align the destination. 249 250 mtxer w4 // byte count to xer 251 cmpwi r0,0 // any chunks to xfer? 252 sub rs,rs,w4 // point to 1st bytes to xfer 253 sub rd,rd,w4 254 lswx w1,0,rs // move w3 bytes to align dest 255 stswx w1,0,rd 256 beq- 2f // pathologic case, no chunks to xfer 2571: 258 lfd f0,-8(rs) 259 lfd f1,-16(rs) 260 lfd f2,-24(rs) 261 lfdu f3,-32(rs) 262 stfd f0,-8(rd) 263 stfd f1,-16(rd) 264 stfd f2,-24(rd) 265 stfdu f3,-32(rd) 266 bdnz 1b 2672: // rc = remaining bytes (0-31) 268 mtxer rc // set up count for string ops 269 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31) 270 sub r0,rd,rc // move dest ptr out of way 271 lswx r5,0,r4 // load xer bytes into r5-r12 272 stswx r5,0,r0 // store them 273 blr 274 275 COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32) 276