1/* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */ 2 3/*- 4 * Copyright (c) 2001 Ben Harris. 5 * Copyright (c) 1994 Mark Brinicombe. 6 * Copyright (c) 1994 Brini. 7 * All rights reserved. 8 * 9 * This code is derived from software written for Brini by Mark Brinicombe 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by Brini. 22 * 4. The name of the company nor the name of the author may be used to 23 * endorse or promote products derived from this software without specific 24 * prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED 27 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 28 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 29 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 30 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 31 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * RiscBSD kernel project 39 * 40 * blockio.S 41 * 42 * optimised block read/write from/to IO routines. 43 * 44 * Created : 08/10/94 45 * Modified : 22/01/99 -- R.Earnshaw 46 * Faster, and small tweaks for StrongARM 47 */ 48 49#include <machine/asm.h> 50 51__FBSDID("$FreeBSD$"); 52 53/* 54 * Read bytes from an I/O address into a block of memory 55 * 56 * r0 = address to read from (IO) 57 * r1 = address to write to (memory) 58 * r2 = length 59 */ 60 61/* This code will look very familiar if you've read _memcpy(). */ 62ENTRY(read_multi_1) 63 mov ip, sp 64 stmfd sp!, {fp, ip, lr, pc} 65 sub fp, ip, #4 66 subs r2, r2, #4 /* r2 = length - 4 */ 67 blt .Lrm1_l4 /* less than 4 bytes */ 68 ands r12, r1, #3 69 beq .Lrm1_main /* aligned destination */ 70 rsb r12, r12, #4 71 cmp r12, #2 72 ldrb r3, [r0] 73 strb r3, [r1], #1 74 ldrgeb r3, [r0] 75 strgeb r3, [r1], #1 76 ldrgtb r3, [r0] 77 strgtb r3, [r1], #1 78 subs r2, r2, r12 79 blt .Lrm1_l4 80.Lrm1_main: 81.Lrm1loop: 82 ldrb r3, [r0] 83 ldrb r12, [r0] 84 orr r3, r3, r12, lsl #8 85 ldrb r12, [r0] 86 orr r3, r3, r12, lsl #16 87 ldrb r12, [r0] 88 orr r3, r3, r12, lsl #24 89 str r3, [r1], #4 90 subs r2, r2, #4 91 bge .Lrm1loop 92.Lrm1_l4: 93 adds r2, r2, #4 /* r2 = length again */ 94 ldmeqdb fp, {fp, sp, pc} 95 RETeq 96 cmp r2, #2 97 ldrb r3, [r0] 98 strb r3, [r1], #1 99 ldrgeb r3, [r0] 100 strgeb r3, [r1], #1 101 ldrgtb r3, [r0] 102 strgtb r3, [r1], #1 103 ldmdb fp, {fp, sp, pc} 104END(read_multi_1) 105 106/* 107 * Write bytes to an I/O address from a block of memory 108 * 109 * r0 = address to write to (IO) 110 * r1 = address to read from (memory) 111 * r2 = length 112 */ 113 114/* This code will look very familiar if you've read _memcpy(). */ 115ENTRY(write_multi_1) 116 mov ip, sp 117 stmfd sp!, {fp, ip, lr, pc} 118 sub fp, ip, #4 119 subs r2, r2, #4 /* r2 = length - 4 */ 120 blt .Lwm1_l4 /* less than 4 bytes */ 121 ands r12, r1, #3 122 beq .Lwm1_main /* aligned source */ 123 rsb r12, r12, #4 124 cmp r12, #2 125 ldrb r3, [r1], #1 126 strb r3, [r0] 127 ldrgeb r3, [r1], #1 128 strgeb r3, [r0] 129 ldrgtb r3, [r1], #1 130 strgtb r3, [r0] 131 subs r2, r2, r12 132 blt .Lwm1_l4 133.Lwm1_main: 134.Lwm1loop: 135 ldr r3, [r1], #4 136 strb r3, [r0] 137 mov r3, r3, lsr #8 138 strb r3, [r0] 139 mov r3, r3, lsr #8 140 strb r3, [r0] 141 mov r3, r3, lsr #8 142 strb r3, [r0] 143 subs r2, r2, #4 144 bge .Lwm1loop 145.Lwm1_l4: 146 adds r2, r2, #4 /* r2 = length again */ 147 ldmeqdb fp, {fp, sp, pc} 148 cmp r2, #2 149 ldrb r3, [r1], #1 150 strb r3, [r0] 151 ldrgeb r3, [r1], #1 152 strgeb r3, [r0] 153 ldrgtb r3, [r1], #1 154 strgtb r3, [r0] 155 ldmdb fp, {fp, sp, pc} 156END(write_multi_1) 157 158/* 159 * Reads short ints (16 bits) from an I/O address into a block of memory 160 * 161 * r0 = address to read from (IO) 162 * r1 = address to write to (memory) 163 * r2 = length 164 */ 165 166ENTRY(insw) 167/* Make sure that we have a positive length */ 168 cmp r2, #0x00000000 169 movle pc, lr 170 171/* If the destination address and the size is word aligned, do it fast */ 172 173 tst r2, #0x00000001 174 tsteq r1, #0x00000003 175 beq .Lfastinsw 176 177/* Non aligned insw */ 178 179.Linswloop: 180 ldr r3, [r0] 181 subs r2, r2, #0x00000001 /* Loop test in load delay slot */ 182 strb r3, [r1], #0x0001 183 mov r3, r3, lsr #8 184 strb r3, [r1], #0x0001 185 bgt .Linswloop 186 187 RET 188 189/* Word aligned insw */ 190 191.Lfastinsw: 192 193.Lfastinswloop: 194 ldr r3, [r0, #0x0002] /* take advantage of nonaligned 195 * word accesses */ 196 ldr ip, [r0] 197 mov r3, r3, lsr #16 /* Put the two shorts together */ 198 orr r3, r3, ip, lsl #16 199 str r3, [r1], #0x0004 /* Store */ 200 subs r2, r2, #0x00000002 /* Next */ 201 bgt .Lfastinswloop 202 203 RET 204END(insw) 205 206/* 207 * Writes short ints (16 bits) from a block of memory to an I/O address 208 * 209 * r0 = address to write to (IO) 210 * r1 = address to read from (memory) 211 * r2 = length 212 */ 213 214ENTRY(outsw) 215/* Make sure that we have a positive length */ 216 cmp r2, #0x00000000 217 movle pc, lr 218 219/* If the destination address and the size is word aligned, do it fast */ 220 221 tst r2, #0x00000001 222 tsteq r1, #0x00000003 223 beq .Lfastoutsw 224 225/* Non aligned outsw */ 226 227.Loutswloop: 228 ldrb r3, [r1], #0x0001 229 ldrb ip, [r1], #0x0001 230 subs r2, r2, #0x00000001 /* Loop test in load delay slot */ 231 orr r3, r3, ip, lsl #8 232 orr r3, r3, r3, lsl #16 233 str r3, [r0] 234 bgt .Loutswloop 235 236 RET 237 238/* Word aligned outsw */ 239 240.Lfastoutsw: 241 242.Lfastoutswloop: 243 ldr r3, [r1], #0x0004 /* r3 = (H)(L) */ 244 subs r2, r2, #0x00000002 /* Loop test in load delay slot */ 245 246 eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */ 247 eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */ 248 eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */ 249 250 str r3, [r0] 251 str ip, [r0] 252 253/* mov ip, r3, lsl #16 254 * orr ip, ip, ip, lsr #16 255 * str ip, [r0] 256 * 257 * mov ip, r3, lsr #16 258 * orr ip, ip, ip, lsl #16 259 * str ip, [r0] 260 */ 261 262 bgt .Lfastoutswloop 263 264 RET 265END(outsw) 266 267/* 268 * reads short ints (16 bits) from an I/O address into a block of memory 269 * with a length garenteed to be a multiple of 16 bytes 270 * with a word aligned destination address 271 * 272 * r0 = address to read from (IO) 273 * r1 = address to write to (memory) 274 * r2 = length 275 */ 276 277ENTRY(insw16) 278/* Make sure that we have a positive length */ 279 cmp r2, #0x00000000 280 movle pc, lr 281 282/* If the destination address is word aligned and the size suitably 283 aligned, do it fast */ 284 285 tst r2, #0x00000007 286 tsteq r1, #0x00000003 287 288 bne _C_LABEL(insw) 289 290/* Word aligned insw */ 291 292 stmfd sp!, {r4,r5,lr} 293 294.Linsw16loop: 295 ldr r3, [r0, #0x0002] /* take advantage of nonaligned 296 * word accesses */ 297 ldr lr, [r0] 298 mov r3, r3, lsr #16 /* Put the two shorts together */ 299 orr r3, r3, lr, lsl #16 300 301 ldr r4, [r0, #0x0002] /* take advantage of nonaligned 302 * word accesses */ 303 ldr lr, [r0] 304 mov r4, r4, lsr #16 /* Put the two shorts together */ 305 orr r4, r4, lr, lsl #16 306 307 ldr r5, [r0, #0x0002] /* take advantage of nonaligned 308 * word accesses */ 309 ldr lr, [r0] 310 mov r5, r5, lsr #16 /* Put the two shorts together */ 311 orr r5, r5, lr, lsl #16 312 313 ldr ip, [r0, #0x0002] /* take advantage of nonaligned 314 * word accesses */ 315 ldr lr, [r0] 316 mov ip, ip, lsr #16 /* Put the two shorts together */ 317 orr ip, ip, lr, lsl #16 318 319 stmia r1!, {r3-r5,ip} 320 subs r2, r2, #0x00000008 /* Next */ 321 bgt .Linsw16loop 322 323 ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */ 324END(insw16) 325 326/* 327 * Writes short ints (16 bits) from a block of memory to an I/O address 328 * 329 * r0 = address to write to (IO) 330 * r1 = address to read from (memory) 331 * r2 = length 332 */ 333 334ENTRY(outsw16) 335/* Make sure that we have a positive length */ 336 cmp r2, #0x00000000 337 movle pc, lr 338 339/* If the destination address is word aligned and the size suitably 340 aligned, do it fast */ 341 342 tst r2, #0x00000007 343 tsteq r1, #0x00000003 344 345 bne _C_LABEL(outsw) 346 347/* Word aligned outsw */ 348 349 stmfd sp!, {r4,r5,lr} 350 351.Loutsw16loop: 352 ldmia r1!, {r4,r5,ip,lr} 353 354 eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */ 355 eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 356 eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 357 str r3, [r0] 358 str r4, [r0] 359 360/* mov r3, r4, lsl #16 361 * orr r3, r3, r3, lsr #16 362 * str r3, [r0] 363 * 364 * mov r3, r4, lsr #16 365 * orr r3, r3, r3, lsl #16 366 * str r3, [r0] 367 */ 368 369 eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */ 370 eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 371 eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 372 str r3, [r0] 373 str r5, [r0] 374 375 eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */ 376 eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 377 eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 378 str r3, [r0] 379 str ip, [r0] 380 381 eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */ 382 eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 383 eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 384 str r3, [r0] 385 str lr, [r0] 386 387 subs r2, r2, #0x00000008 388 bgt .Loutsw16loop 389 390 ldmfd sp!, {r4,r5,pc} /* and go home */ 391END(outsw16) 392 393/* 394 * reads short ints (16 bits) from an I/O address into a block of memory 395 * The I/O address is assumed to be mapped multiple times in a block of 396 * 8 words. 397 * The destination address should be word aligned. 398 * 399 * r0 = address to read from (IO) 400 * r1 = address to write to (memory) 401 * r2 = length 402 */ 403 404ENTRY(inswm8) 405/* Make sure that we have a positive length */ 406 cmp r2, #0x00000000 407 movle pc, lr 408 409/* If the destination address is word aligned and the size suitably 410 aligned, do it fast */ 411 412 tst r1, #0x00000003 413 414 bne _C_LABEL(insw) 415 416/* Word aligned insw */ 417 418 stmfd sp!, {r4-r9,lr} 419 420 mov lr, #0xff000000 421 orr lr, lr, #0x00ff0000 422 423.Linswm8_loop8: 424 cmp r2, #8 425 bcc .Linswm8_l8 426 427 ldmia r0, {r3-r9,ip} 428 429 bic r3, r3, lr 430 orr r3, r3, r4, lsl #16 431 bic r5, r5, lr 432 orr r4, r5, r6, lsl #16 433 bic r7, r7, lr 434 orr r5, r7, r8, lsl #16 435 bic r9, r9, lr 436 orr r6, r9, ip, lsl #16 437 438 stmia r1!, {r3-r6} 439 440 subs r2, r2, #0x00000008 /* Next */ 441 bne .Linswm8_loop8 442 beq .Linswm8_l1 443 444.Linswm8_l8: 445 cmp r2, #4 446 bcc .Linswm8_l4 447 448 ldmia r0, {r3-r6} 449 450 bic r3, r3, lr 451 orr r3, r3, r4, lsl #16 452 bic r5, r5, lr 453 orr r4, r5, r6, lsl #16 454 455 stmia r1!, {r3-r4} 456 457 subs r2, r2, #0x00000004 458 beq .Linswm8_l1 459 460.Linswm8_l4: 461 cmp r2, #2 462 bcc .Linswm8_l2 463 464 ldmia r0, {r3-r4} 465 466 bic r3, r3, lr 467 orr r3, r3, r4, lsl #16 468 str r3, [r1], #0x0004 469 470 subs r2, r2, #0x00000002 471 beq .Linswm8_l1 472 473.Linswm8_l2: 474 cmp r2, #1 475 bcc .Linswm8_l1 476 477 ldr r3, [r0] 478 subs r2, r2, #0x00000001 /* Test in load delay slot */ 479 /* XXX, why don't we use result? */ 480 481 strb r3, [r1], #0x0001 482 mov r3, r3, lsr #8 483 strb r3, [r1], #0x0001 484 485 486.Linswm8_l1: 487 ldmfd sp!, {r4-r9,pc} /* And go home */ 488END(inswm8) 489 490/* 491 * write short ints (16 bits) to an I/O address from a block of memory 492 * The I/O address is assumed to be mapped multiple times in a block of 493 * 8 words. 494 * The source address should be word aligned. 495 * 496 * r0 = address to read to (IO) 497 * r1 = address to write from (memory) 498 * r2 = length 499 */ 500 501ENTRY(outswm8) 502/* Make sure that we have a positive length */ 503 cmp r2, #0x00000000 504 movle pc, lr 505 506/* If the destination address is word aligned and the size suitably 507 aligned, do it fast */ 508 509 tst r1, #0x00000003 510 511 bne _C_LABEL(outsw) 512 513/* Word aligned outsw */ 514 515 stmfd sp!, {r4-r8,lr} 516 517.Loutswm8_loop8: 518 cmp r2, #8 519 bcc .Loutswm8_l8 520 521 ldmia r1!, {r3,r5,r7,ip} 522 523 eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */ 524 eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */ 525 eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 526 527 eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */ 528 eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */ 529 eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */ 530 531 eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */ 532 eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */ 533 eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */ 534 535 eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */ 536 eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */ 537 eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */ 538 539 stmia r0, {r3-r8,ip,lr} 540 541 subs r2, r2, #0x00000008 /* Next */ 542 bne .Loutswm8_loop8 543 beq .Loutswm8_l1 544 545.Loutswm8_l8: 546 cmp r2, #4 547 bcc .Loutswm8_l4 548 549 ldmia r1!, {r3-r4} 550 551 eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */ 552 eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */ 553 eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */ 554 555 eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */ 556 eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */ 557 eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */ 558 559 stmia r0, {r5-r8} 560 561 subs r2, r2, #0x00000004 562 beq .Loutswm8_l1 563 564.Loutswm8_l4: 565 cmp r2, #2 566 bcc .Loutswm8_l2 567 568 ldr r3, [r1], #0x0004 /* r3 = (A)(B) */ 569 subs r2, r2, #0x00000002 /* Done test in Load delay slot */ 570 571 eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/ 572 eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */ 573 eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */ 574 575 stmia r0, {r4, r5} 576 577 beq .Loutswm8_l1 578 579.Loutswm8_l2: 580 cmp r2, #1 581 bcc .Loutswm8_l1 582 583 ldrb r3, [r1], #0x0001 584 ldrb r4, [r1], #0x0001 585 subs r2, r2, #0x00000001 /* Done test in load delay slot */ 586 /* XXX This test isn't used? */ 587 orr r3, r3, r4, lsl #8 588 orr r3, r3, r3, lsl #16 589 str r3, [r0] 590 591.Loutswm8_l1: 592 ldmfd sp!, {r4-r8,pc} /* And go home */ 593END(outswm8) 594 595