1/* 2 * linux/arch/arm26/lib/csumpartialcopygeneric.S 3 * 4 * Copyright (C) 1995-2001 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 * 10 * JMA 01/06/03 Commented out some shl0s; probobly irrelevant to arm26 11 * 12 */ 13 14/* 15 * unsigned int 16 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) 17 * r0 = src, r1 = dst, r2 = len, r3 = sum 18 * Returns : r0 = checksum 19 * 20 * Note that 'tst' and 'teq' preserve the carry flag. 21 */ 22 23/* Quick hack */ 24 .macro save_regs 25 stmfd sp!, {r1, r4 - r8, fp, ip, lr, pc} 26 .endm 27 28/* end Quick Hack */ 29 30src .req r0 31dst .req r1 32len .req r2 33sum .req r3 34 35.zero: mov r0, sum 36 load_regs ea 37 38 /* 39 * Align an unaligned destination pointer. We know that 40 * we have >= 8 bytes here, so we don't need to check 41 * the length. Note that the source pointer hasn't been 42 * aligned yet. 43 */ 44.dst_unaligned: tst dst, #1 45 beq .dst_16bit 46 47 load1b ip 48 sub len, len, #1 49 adcs sum, sum, ip, lsl #byte(1) @ update checksum 50 strb ip, [dst], #1 51 tst dst, #2 52 moveq pc, lr @ dst is now 32bit aligned 53 54.dst_16bit: load2b r8, ip 55 sub len, len, #2 56 adcs sum, sum, r8, lsl #byte(0) 57 strb r8, [dst], #1 58 adcs sum, sum, ip, lsl #byte(1) 59 strb ip, [dst], #1 60 mov pc, lr @ dst is now 32bit aligned 61 62 /* 63 * Handle 0 to 7 bytes, with any alignment of source and 64 * destination pointers. Note that when we get here, C = 0 65 */ 66.less8: teq len, #0 @ check for zero count 67 beq .zero 68 69 /* we must have at least one byte. */ 70 tst dst, #1 @ dst 16-bit aligned 71 beq .less8_aligned 72 73 /* Align dst */ 74 load1b ip 75 sub len, len, #1 76 adcs sum, sum, ip, lsl #byte(1) @ update checksum 77 strb ip, [dst], #1 78 tst len, #6 79 beq .less8_byteonly 80 811: load2b r8, ip 82 sub len, len, #2 83 adcs sum, sum, r8, lsl #byte(0) 84 strb r8, [dst], #1 85 adcs sum, sum, ip, lsl #byte(1) 86 strb ip, [dst], #1 87.less8_aligned: tst len, #6 88 bne 1b 89.less8_byteonly: 90 tst len, #1 91 beq .done 92 load1b r8 93 adcs sum, sum, r8, lsl #byte(0) @ update checksum 94 strb r8, [dst], #1 95 b .done 96 97FN_ENTRY 98 mov ip, sp 99 save_regs 100 sub fp, ip, #4 101 102 cmp len, #8 @ Ensure that we have at least 103 blo .less8 @ 8 bytes to copy. 104 105 adds sum, sum, #0 @ C = 0 106 tst dst, #3 @ Test destination alignment 107 blne .dst_unaligned @ align destination, return here 108 109 /* 110 * Ok, the dst pointer is now 32bit aligned, and we know 111 * that we must have more than 4 bytes to copy. Note 112 * that C contains the carry from the dst alignment above. 113 */ 114 115 tst src, #3 @ Test source alignment 116 bne .src_not_aligned 117 118 /* Routine for src & dst aligned */ 119 120 bics ip, len, #15 121 beq 2f 122 1231: load4l r4, r5, r6, r7 124 stmia dst!, {r4, r5, r6, r7} 125 adcs sum, sum, r4 126 adcs sum, sum, r5 127 adcs sum, sum, r6 128 adcs sum, sum, r7 129 sub ip, ip, #16 130 teq ip, #0 131 bne 1b 132 1332: ands ip, len, #12 134 beq 4f 135 tst ip, #8 136 beq 3f 137 load2l r4, r5 138 stmia dst!, {r4, r5} 139 adcs sum, sum, r4 140 adcs sum, sum, r5 141 tst ip, #4 142 beq 4f 143 1443: load1l r4 145 str r4, [dst], #4 146 adcs sum, sum, r4 147 1484: ands len, len, #3 149 beq .done 150 load1l r4 151 tst len, #2 152 beq .exit 153 adcs sum, sum, r4, push #16 154 strb r5, [dst], #1 155 mov r5, r4, lsr #byte(1) 156 strb r5, [dst], #1 157 mov r5, r4, lsr #byte(2) 158.exit: tst len, #1 159 strneb r5, [dst], #1 160 andne r5, r5, #255 161 adcnes sum, sum, r5, lsl #byte(0) 162 163 /* 164 * If the dst pointer was not 16-bit aligned, we 165 * need to rotate the checksum here to get around 166 * the inefficient byte manipulations in the 167 * architecture independent code. 168 */ 169.done: adc r0, sum, #0 170 ldr sum, [sp, #0] @ dst 171 tst sum, #1 172 movne sum, r0, lsl #8 173 orrne r0, sum, r0, lsr #24 174 load_regs ea 175 176.src_not_aligned: 177 adc sum, sum, #0 @ include C from dst alignment 178 and ip, src, #3 179 bic src, src, #3 180 load1l r5 181 cmp ip, #2 182 beq .src2_aligned 183 bhi .src3_aligned 184 mov r4, r5, pull #8 @ C = 0 185 bics ip, len, #15 186 beq 2f 1871: load4l r5, r6, r7, r8 188 orr r4, r4, r5, push #24 189 mov r5, r5, pull #8 190 orr r5, r5, r6, push #24 191 mov r6, r6, pull #8 192 orr r6, r6, r7, push #24 193 mov r7, r7, pull #8 194 orr r7, r7, r8, push #24 195 stmia dst!, {r4, r5, r6, r7} 196 adcs sum, sum, r4 197 adcs sum, sum, r5 198 adcs sum, sum, r6 199 adcs sum, sum, r7 200 mov r4, r8, pull #8 201 sub ip, ip, #16 202 teq ip, #0 203 bne 1b 2042: ands ip, len, #12 205 beq 4f 206 tst ip, #8 207 beq 3f 208 load2l r5, r6 209 orr r4, r4, r5, push #24 210 mov r5, r5, pull #8 211 orr r5, r5, r6, push #24 212 stmia dst!, {r4, r5} 213 adcs sum, sum, r4 214 adcs sum, sum, r5 215 mov r4, r6, pull #8 216 tst ip, #4 217 beq 4f 2183: load1l r5 219 orr r4, r4, r5, push #24 220 str r4, [dst], #4 221 adcs sum, sum, r4 222 mov r4, r5, pull #8 2234: ands len, len, #3 224 beq .done 225 tst len, #2 226 beq .exit 227 adcs sum, sum, r4, push #16 228 strb r5, [dst], #1 229 mov r5, r4, lsr #byte(1) 230 strb r5, [dst], #1 231 mov r5, r4, lsr #byte(2) 232 b .exit 233 234.src2_aligned: mov r4, r5, pull #16 235 adds sum, sum, #0 236 bics ip, len, #15 237 beq 2f 2381: load4l r5, r6, r7, r8 239 orr r4, r4, r5, push #16 240 mov r5, r5, pull #16 241 orr r5, r5, r6, push #16 242 mov r6, r6, pull #16 243 orr r6, r6, r7, push #16 244 mov r7, r7, pull #16 245 orr r7, r7, r8, push #16 246 stmia dst!, {r4, r5, r6, r7} 247 adcs sum, sum, r4 248 adcs sum, sum, r5 249 adcs sum, sum, r6 250 adcs sum, sum, r7 251 mov r4, r8, pull #16 252 sub ip, ip, #16 253 teq ip, #0 254 bne 1b 2552: ands ip, len, #12 256 beq 4f 257 tst ip, #8 258 beq 3f 259 load2l r5, r6 260 orr r4, r4, r5, push #16 261 mov r5, r5, pull #16 262 orr r5, r5, r6, push #16 263 stmia dst!, {r4, r5} 264 adcs sum, sum, r4 265 adcs sum, sum, r5 266 mov r4, r6, pull #16 267 tst ip, #4 268 beq 4f 2693: load1l r5 270 orr r4, r4, r5, push #16 271 str r4, [dst], #4 272 adcs sum, sum, r4 273 mov r4, r5, pull #16 2744: ands len, len, #3 275 beq .done 276 tst len, #2 277 beq .exit 278 adcs sum, sum, r4 279 strb r5, [dst], #1 280 mov r5, r4, lsr #byte(1) 281 strb r5, [dst], #1 282 tst len, #1 283 beq .done 284 load1b r5 285 b .exit 286 287.src3_aligned: mov r4, r5, pull #24 288 adds sum, sum, #0 289 bics ip, len, #15 290 beq 2f 2911: load4l r5, r6, r7, r8 292 orr r4, r4, r5, push #8 293 mov r5, r5, pull #24 294 orr r5, r5, r6, push #8 295 mov r6, r6, pull #24 296 orr r6, r6, r7, push #8 297 mov r7, r7, pull #24 298 orr r7, r7, r8, push #8 299 stmia dst!, {r4, r5, r6, r7} 300 adcs sum, sum, r4 301 adcs sum, sum, r5 302 adcs sum, sum, r6 303 adcs sum, sum, r7 304 mov r4, r8, pull #24 305 sub ip, ip, #16 306 teq ip, #0 307 bne 1b 3082: ands ip, len, #12 309 beq 4f 310 tst ip, #8 311 beq 3f 312 load2l r5, r6 313 orr r4, r4, r5, push #8 314 mov r5, r5, pull #24 315 orr r5, r5, r6, push #8 316 stmia dst!, {r4, r5} 317 adcs sum, sum, r4 318 adcs sum, sum, r5 319 mov r4, r6, pull #24 320 tst ip, #4 321 beq 4f 3223: load1l r5 323 orr r4, r4, r5, push #8 324 str r4, [dst], #4 325 adcs sum, sum, r4 326 mov r4, r5, pull #24 3274: ands len, len, #3 328 beq .done 329 tst len, #2 330 beq .exit 331 strb r5, [dst], #1 332 adcs sum, sum, r4 333 load1l r4 334 strb r5, [dst], #1 335 adcs sum, sum, r4, push #24 336 mov r5, r4, lsr #byte(1) 337 b .exit 338