1@ 2@ ARMv4 optimized DSP utils 3@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp> 4@ 5@ This file is part of Libav. 6@ 7@ Libav is free software; you can redistribute it and/or 8@ modify it under the terms of the GNU Lesser General Public 9@ License as published by the Free Software Foundation; either 10@ version 2.1 of the License, or (at your option) any later version. 11@ 12@ Libav is distributed in the hope that it will be useful, 13@ but WITHOUT ANY WARRANTY; without even the implied warranty of 14@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15@ Lesser General Public License for more details. 16@ 17@ You should have received a copy of the GNU Lesser General Public 18@ License along with Libav; if not, write to the Free Software 19@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20@ 21 22#include "config.h" 23#include "asm.S" 24 25 preserve8 26 27#if HAVE_ARMV5TE 28function ff_prefetch_arm, export=1 29 subs r2, r2, #1 30 pld [r0] 31 add r0, r0, r1 32 bne ff_prefetch_arm 33 bx lr 34endfunc 35#else 36#define pld @ 37#endif 38 39.macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 40 mov \Rd0, \Rn0, lsr #(\shift * 8) 41 mov \Rd1, \Rn1, lsr #(\shift * 8) 42 mov \Rd2, \Rn2, lsr #(\shift * 8) 43 mov \Rd3, \Rn3, lsr #(\shift * 8) 44 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) 45 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) 46 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) 47 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) 48.endm 49.macro ALIGN_DWORD shift, R0, R1, R2 50 mov \R0, \R0, lsr #(\shift * 8) 51 orr \R0, \R0, \R1, lsl #(32 - \shift * 8) 52 mov \R1, \R1, lsr #(\shift * 8) 53 orr \R1, \R1, \R2, lsl #(32 - \shift * 8) 54.endm 55.macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 56 mov \Rdst0, \Rsrc0, lsr #(\shift * 8) 57 mov \Rdst1, \Rsrc1, lsr #(\shift * 8) 58 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) 59 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) 60.endm 61 62.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask 63 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) 64 @ Rmask = 0xFEFEFEFE 65 @ Rn = destroy 66 eor \Rd0, \Rn0, \Rm0 67 eor \Rd1, \Rn1, \Rm1 68 orr \Rn0, \Rn0, \Rm0 69 orr \Rn1, \Rn1, \Rm1 70 and \Rd0, \Rd0, \Rmask 71 and \Rd1, \Rd1, \Rmask 72 sub \Rd0, \Rn0, \Rd0, lsr #1 73 sub \Rd1, \Rn1, \Rd1, lsr #1 74.endm 75 76.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask 77 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) 78 @ Rmask = 0xFEFEFEFE 79 @ Rn = destroy 80 eor \Rd0, \Rn0, \Rm0 81 eor \Rd1, \Rn1, \Rm1 82 and \Rn0, \Rn0, \Rm0 83 and \Rn1, \Rn1, \Rm1 84 and \Rd0, \Rd0, \Rmask 85 and \Rd1, \Rd1, \Rmask 86 add \Rd0, \Rn0, \Rd0, lsr #1 87 add \Rd1, \Rn1, \Rd1, lsr #1 88.endm 89 90.macro JMP_ALIGN tmp, reg 91 ands \tmp, \reg, #3 92 bic \reg, \reg, #3 93 beq 1f 94 subs \tmp, \tmp, #1 95 beq 2f 96 subs \tmp, \tmp, #1 97 beq 3f 98 b 4f 99.endm 100 101@ ---------------------------------------------------------------- 102 .align 5 103function ff_put_pixels16_arm, export=1 104 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 105 @ block = word aligned, pixles = unaligned 106 pld [r1] 107 push {r4-r11, lr} 108 JMP_ALIGN r5, r1 1091: 110 ldm r1, {r4-r7} 111 add r1, r1, r2 112 stm r0, {r4-r7} 113 pld [r1] 114 subs r3, r3, #1 115 add r0, r0, r2 116 bne 1b 117 pop {r4-r11, pc} 118 .align 5 1192: 120 ldm r1, {r4-r8} 121 add r1, r1, r2 122 ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 123 pld [r1] 124 subs r3, r3, #1 125 stm r0, {r9-r12} 126 add r0, r0, r2 127 bne 2b 128 pop {r4-r11, pc} 129 .align 5 1303: 131 ldm r1, {r4-r8} 132 add r1, r1, r2 133 ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 134 pld [r1] 135 subs r3, r3, #1 136 stm r0, {r9-r12} 137 add r0, r0, r2 138 bne 3b 139 pop {r4-r11, pc} 140 .align 5 1414: 142 ldm r1, {r4-r8} 143 add r1, r1, r2 144 ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 145 pld [r1] 146 subs r3, r3, #1 147 stm r0, {r9-r12} 148 add r0, r0, r2 149 bne 4b 150 pop {r4-r11,pc} 151endfunc 152 153@ ---------------------------------------------------------------- 154 .align 5 155function ff_put_pixels8_arm, export=1 156 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 157 @ block = word aligned, pixles = unaligned 158 pld [r1] 159 push {r4-r5,lr} 160 JMP_ALIGN r5, r1 1611: 162 ldm r1, {r4-r5} 163 add r1, r1, r2 164 subs r3, r3, #1 165 pld [r1] 166 stm r0, {r4-r5} 167 add r0, r0, r2 168 bne 1b 169 pop {r4-r5,pc} 170 .align 5 1712: 172 ldm r1, {r4-r5, r12} 173 add r1, r1, r2 174 ALIGN_DWORD 1, r4, r5, r12 175 pld [r1] 176 subs r3, r3, #1 177 stm r0, {r4-r5} 178 add r0, r0, r2 179 bne 2b 180 pop {r4-r5,pc} 181 .align 5 1823: 183 ldm r1, {r4-r5, r12} 184 add r1, r1, r2 185 ALIGN_DWORD 2, r4, r5, r12 186 pld [r1] 187 subs r3, r3, #1 188 stm r0, {r4-r5} 189 add r0, r0, r2 190 bne 3b 191 pop {r4-r5,pc} 192 .align 5 1934: 194 ldm r1, {r4-r5, r12} 195 add r1, r1, r2 196 ALIGN_DWORD 3, r4, r5, r12 197 pld [r1] 198 subs r3, r3, #1 199 stm r0, {r4-r5} 200 add r0, r0, r2 201 bne 4b 202 pop {r4-r5,pc} 203endfunc 204 205@ ---------------------------------------------------------------- 206 .align 5 207function ff_put_pixels8_x2_arm, export=1 208 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 209 @ block = word aligned, pixles = unaligned 210 pld [r1] 211 push {r4-r10,lr} 212 ldr r12, =0xfefefefe 213 JMP_ALIGN r5, r1 2141: 215 ldm r1, {r4-r5, r10} 216 add r1, r1, r2 217 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 218 pld [r1] 219 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 220 subs r3, r3, #1 221 stm r0, {r8-r9} 222 add r0, r0, r2 223 bne 1b 224 pop {r4-r10,pc} 225 .align 5 2262: 227 ldm r1, {r4-r5, r10} 228 add r1, r1, r2 229 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 230 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 231 pld [r1] 232 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 233 subs r3, r3, #1 234 stm r0, {r4-r5} 235 add r0, r0, r2 236 bne 2b 237 pop {r4-r10,pc} 238 .align 5 2393: 240 ldm r1, {r4-r5, r10} 241 add r1, r1, r2 242 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 243 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 244 pld [r1] 245 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 246 subs r3, r3, #1 247 stm r0, {r4-r5} 248 add r0, r0, r2 249 bne 3b 250 pop {r4-r10,pc} 251 .align 5 2524: 253 ldm r1, {r4-r5, r10} 254 add r1, r1, r2 255 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 256 pld [r1] 257 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 258 subs r3, r3, #1 259 stm r0, {r8-r9} 260 add r0, r0, r2 261 bne 4b 262 pop {r4-r10,pc} 263endfunc 264 265 .align 5 266function ff_put_no_rnd_pixels8_x2_arm, export=1 267 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 268 @ block = word aligned, pixles = unaligned 269 pld [r1] 270 push {r4-r10,lr} 271 ldr r12, =0xfefefefe 272 JMP_ALIGN r5, r1 2731: 274 ldm r1, {r4-r5, r10} 275 add r1, r1, r2 276 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 277 pld [r1] 278 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 279 subs r3, r3, #1 280 stm r0, {r8-r9} 281 add r0, r0, r2 282 bne 1b 283 pop {r4-r10,pc} 284 .align 5 2852: 286 ldm r1, {r4-r5, r10} 287 add r1, r1, r2 288 ALIGN_DWORD_D 1, r6, r7, r4, r5, r10 289 ALIGN_DWORD_D 2, r8, r9, r4, r5, r10 290 pld [r1] 291 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 292 subs r3, r3, #1 293 stm r0, {r4-r5} 294 add r0, r0, r2 295 bne 2b 296 pop {r4-r10,pc} 297 .align 5 2983: 299 ldm r1, {r4-r5, r10} 300 add r1, r1, r2 301 ALIGN_DWORD_D 2, r6, r7, r4, r5, r10 302 ALIGN_DWORD_D 3, r8, r9, r4, r5, r10 303 pld [r1] 304 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 305 subs r3, r3, #1 306 stm r0, {r4-r5} 307 add r0, r0, r2 308 bne 3b 309 pop {r4-r10,pc} 310 .align 5 3114: 312 ldm r1, {r4-r5, r10} 313 add r1, r1, r2 314 ALIGN_DWORD_D 3, r6, r7, r4, r5, r10 315 pld [r1] 316 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 317 subs r3, r3, #1 318 stm r0, {r8-r9} 319 add r0, r0, r2 320 bne 4b 321 pop {r4-r10,pc} 322endfunc 323 324 325@ ---------------------------------------------------------------- 326 .align 5 327function ff_put_pixels8_y2_arm, export=1 328 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 329 @ block = word aligned, pixles = unaligned 330 pld [r1] 331 push {r4-r11,lr} 332 mov r3, r3, lsr #1 333 ldr r12, =0xfefefefe 334 JMP_ALIGN r5, r1 3351: 336 ldm r1, {r4-r5} 337 add r1, r1, r2 3386: ldm r1, {r6-r7} 339 add r1, r1, r2 340 pld [r1] 341 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 342 ldm r1, {r4-r5} 343 add r1, r1, r2 344 stm r0, {r8-r9} 345 add r0, r0, r2 346 pld [r1] 347 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 348 subs r3, r3, #1 349 stm r0, {r8-r9} 350 add r0, r0, r2 351 bne 6b 352 pop {r4-r11,pc} 353 .align 5 3542: 355 ldm r1, {r4-r6} 356 add r1, r1, r2 357 pld [r1] 358 ALIGN_DWORD 1, r4, r5, r6 3596: ldm r1, {r7-r9} 360 add r1, r1, r2 361 pld [r1] 362 ALIGN_DWORD 1, r7, r8, r9 363 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 364 stm r0, {r10-r11} 365 add r0, r0, r2 366 ldm r1, {r4-r6} 367 add r1, r1, r2 368 pld [r1] 369 ALIGN_DWORD 1, r4, r5, r6 370 subs r3, r3, #1 371 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 372 stm r0, {r10-r11} 373 add r0, r0, r2 374 bne 6b 375 pop {r4-r11,pc} 376 .align 5 3773: 378 ldm r1, {r4-r6} 379 add r1, r1, r2 380 pld [r1] 381 ALIGN_DWORD 2, r4, r5, r6 3826: ldm r1, {r7-r9} 383 add r1, r1, r2 384 pld [r1] 385 ALIGN_DWORD 2, r7, r8, r9 386 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 387 stm r0, {r10-r11} 388 add r0, r0, r2 389 ldm r1, {r4-r6} 390 add r1, r1, r2 391 pld [r1] 392 ALIGN_DWORD 2, r4, r5, r6 393 subs r3, r3, #1 394 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 395 stm r0, {r10-r11} 396 add r0, r0, r2 397 bne 6b 398 pop {r4-r11,pc} 399 .align 5 4004: 401 ldm r1, {r4-r6} 402 add r1, r1, r2 403 pld [r1] 404 ALIGN_DWORD 3, r4, r5, r6 4056: ldm r1, {r7-r9} 406 add r1, r1, r2 407 pld [r1] 408 ALIGN_DWORD 3, r7, r8, r9 409 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 410 stm r0, {r10-r11} 411 add r0, r0, r2 412 ldm r1, {r4-r6} 413 add r1, r1, r2 414 pld [r1] 415 ALIGN_DWORD 3, r4, r5, r6 416 subs r3, r3, #1 417 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 418 stm r0, {r10-r11} 419 add r0, r0, r2 420 bne 6b 421 pop {r4-r11,pc} 422endfunc 423 424 .align 5 425function ff_put_no_rnd_pixels8_y2_arm, export=1 426 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 427 @ block = word aligned, pixles = unaligned 428 pld [r1] 429 push {r4-r11,lr} 430 mov r3, r3, lsr #1 431 ldr r12, =0xfefefefe 432 JMP_ALIGN r5, r1 4331: 434 ldm r1, {r4-r5} 435 add r1, r1, r2 4366: ldm r1, {r6-r7} 437 add r1, r1, r2 438 pld [r1] 439 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 440 ldm r1, {r4-r5} 441 add r1, r1, r2 442 stm r0, {r8-r9} 443 add r0, r0, r2 444 pld [r1] 445 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 446 subs r3, r3, #1 447 stm r0, {r8-r9} 448 add r0, r0, r2 449 bne 6b 450 pop {r4-r11,pc} 451 .align 5 4522: 453 ldm r1, {r4-r6} 454 add r1, r1, r2 455 pld [r1] 456 ALIGN_DWORD 1, r4, r5, r6 4576: ldm r1, {r7-r9} 458 add r1, r1, r2 459 pld [r1] 460 ALIGN_DWORD 1, r7, r8, r9 461 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 462 stm r0, {r10-r11} 463 add r0, r0, r2 464 ldm r1, {r4-r6} 465 add r1, r1, r2 466 pld [r1] 467 ALIGN_DWORD 1, r4, r5, r6 468 subs r3, r3, #1 469 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 470 stm r0, {r10-r11} 471 add r0, r0, r2 472 bne 6b 473 pop {r4-r11,pc} 474 .align 5 4753: 476 ldm r1, {r4-r6} 477 add r1, r1, r2 478 pld [r1] 479 ALIGN_DWORD 2, r4, r5, r6 4806: ldm r1, {r7-r9} 481 add r1, r1, r2 482 pld [r1] 483 ALIGN_DWORD 2, r7, r8, r9 484 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 485 stm r0, {r10-r11} 486 add r0, r0, r2 487 ldm r1, {r4-r6} 488 add r1, r1, r2 489 pld [r1] 490 ALIGN_DWORD 2, r4, r5, r6 491 subs r3, r3, #1 492 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 493 stm r0, {r10-r11} 494 add r0, r0, r2 495 bne 6b 496 pop {r4-r11,pc} 497 .align 5 4984: 499 ldm r1, {r4-r6} 500 add r1, r1, r2 501 pld [r1] 502 ALIGN_DWORD 3, r4, r5, r6 5036: ldm r1, {r7-r9} 504 add r1, r1, r2 505 pld [r1] 506 ALIGN_DWORD 3, r7, r8, r9 507 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 508 stm r0, {r10-r11} 509 add r0, r0, r2 510 ldm r1, {r4-r6} 511 add r1, r1, r2 512 pld [r1] 513 ALIGN_DWORD 3, r4, r5, r6 514 subs r3, r3, #1 515 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 516 stm r0, {r10-r11} 517 add r0, r0, r2 518 bne 6b 519 pop {r4-r11,pc} 520endfunc 521 522 .ltorg 523 524@ ---------------------------------------------------------------- 525.macro RND_XY2_IT align, rnd 526 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) 527 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) 528.if \align == 0 529 ldm r1, {r6-r8} 530.elseif \align == 3 531 ldm r1, {r5-r7} 532.else 533 ldm r1, {r8-r10} 534.endif 535 add r1, r1, r2 536 pld [r1] 537.if \align == 0 538 ALIGN_DWORD_D 1, r4, r5, r6, r7, r8 539.elseif \align == 1 540 ALIGN_DWORD_D 1, r4, r5, r8, r9, r10 541 ALIGN_DWORD_D 2, r6, r7, r8, r9, r10 542.elseif \align == 2 543 ALIGN_DWORD_D 2, r4, r5, r8, r9, r10 544 ALIGN_DWORD_D 3, r6, r7, r8, r9, r10 545.elseif \align == 3 546 ALIGN_DWORD_D 3, r4, r5, r5, r6, r7 547.endif 548 ldr r14, =0x03030303 549 tst r3, #1 550 and r8, r4, r14 551 and r9, r5, r14 552 and r10, r6, r14 553 and r11, r7, r14 554 it eq 555 andeq r14, r14, r14, \rnd #1 556 add r8, r8, r10 557 add r9, r9, r11 558 ldr r12, =0xfcfcfcfc >> 2 559 itt eq 560 addeq r8, r8, r14 561 addeq r9, r9, r14 562 and r4, r12, r4, lsr #2 563 and r5, r12, r5, lsr #2 564 and r6, r12, r6, lsr #2 565 and r7, r12, r7, lsr #2 566 add r10, r4, r6 567 add r11, r5, r7 568 subs r3, r3, #1 569.endm 570 571.macro RND_XY2_EXPAND align, rnd 572 RND_XY2_IT \align, \rnd 5736: push {r8-r11} 574 RND_XY2_IT \align, \rnd 575 pop {r4-r7} 576 add r4, r4, r8 577 add r5, r5, r9 578 ldr r14, =0x0f0f0f0f 579 add r6, r6, r10 580 add r7, r7, r11 581 and r4, r14, r4, lsr #2 582 and r5, r14, r5, lsr #2 583 add r4, r4, r6 584 add r5, r5, r7 585 stm r0, {r4-r5} 586 add r0, r0, r2 587 bge 6b 588 pop {r4-r11,pc} 589.endm 590 591 .align 5 592function ff_put_pixels8_xy2_arm, export=1 593 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 594 @ block = word aligned, pixles = unaligned 595 pld [r1] 596 push {r4-r11,lr} @ R14 is also called LR 597 JMP_ALIGN r5, r1 5981: RND_XY2_EXPAND 0, lsl 599 .align 5 6002: RND_XY2_EXPAND 1, lsl 601 .align 5 6023: RND_XY2_EXPAND 2, lsl 603 .align 5 6044: RND_XY2_EXPAND 3, lsl 605endfunc 606 607 .align 5 608function ff_put_no_rnd_pixels8_xy2_arm, export=1 609 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 610 @ block = word aligned, pixles = unaligned 611 pld [r1] 612 push {r4-r11,lr} 613 JMP_ALIGN r5, r1 6141: RND_XY2_EXPAND 0, lsr 615 .align 5 6162: RND_XY2_EXPAND 1, lsr 617 .align 5 6183: RND_XY2_EXPAND 2, lsr 619 .align 5 6204: RND_XY2_EXPAND 3, lsr 621endfunc 622 623 .align 5 624@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride) 625function ff_add_pixels_clamped_arm, export=1 626 push {r4-r10} 627 mov r10, #8 6281: 629 ldr r4, [r1] /* load dest */ 630 /* block[0] and block[1]*/ 631 ldrsh r5, [r0] 632 ldrsh r7, [r0, #2] 633 and r6, r4, #0xFF 634 and r8, r4, #0xFF00 635 add r6, r5, r6 636 add r8, r7, r8, lsr #8 637 mvn r5, r5 638 mvn r7, r7 639 tst r6, #0x100 640 it ne 641 movne r6, r5, lsr #24 642 tst r8, #0x100 643 it ne 644 movne r8, r7, lsr #24 645 mov r9, r6 646 ldrsh r5, [r0, #4] /* moved form [A] */ 647 orr r9, r9, r8, lsl #8 648 /* block[2] and block[3] */ 649 /* [A] */ 650 ldrsh r7, [r0, #6] 651 and r6, r4, #0xFF0000 652 and r8, r4, #0xFF000000 653 add r6, r5, r6, lsr #16 654 add r8, r7, r8, lsr #24 655 mvn r5, r5 656 mvn r7, r7 657 tst r6, #0x100 658 it ne 659 movne r6, r5, lsr #24 660 tst r8, #0x100 661 it ne 662 movne r8, r7, lsr #24 663 orr r9, r9, r6, lsl #16 664 ldr r4, [r1, #4] /* moved form [B] */ 665 orr r9, r9, r8, lsl #24 666 /* store dest */ 667 ldrsh r5, [r0, #8] /* moved form [C] */ 668 str r9, [r1] 669 670 /* load dest */ 671 /* [B] */ 672 /* block[4] and block[5] */ 673 /* [C] */ 674 ldrsh r7, [r0, #10] 675 and r6, r4, #0xFF 676 and r8, r4, #0xFF00 677 add r6, r5, r6 678 add r8, r7, r8, lsr #8 679 mvn r5, r5 680 mvn r7, r7 681 tst r6, #0x100 682 it ne 683 movne r6, r5, lsr #24 684 tst r8, #0x100 685 it ne 686 movne r8, r7, lsr #24 687 mov r9, r6 688 ldrsh r5, [r0, #12] /* moved from [D] */ 689 orr r9, r9, r8, lsl #8 690 /* block[6] and block[7] */ 691 /* [D] */ 692 ldrsh r7, [r0, #14] 693 and r6, r4, #0xFF0000 694 and r8, r4, #0xFF000000 695 add r6, r5, r6, lsr #16 696 add r8, r7, r8, lsr #24 697 mvn r5, r5 698 mvn r7, r7 699 tst r6, #0x100 700 it ne 701 movne r6, r5, lsr #24 702 tst r8, #0x100 703 it ne 704 movne r8, r7, lsr #24 705 orr r9, r9, r6, lsl #16 706 add r0, r0, #16 /* moved from [E] */ 707 orr r9, r9, r8, lsl #24 708 subs r10, r10, #1 /* moved from [F] */ 709 /* store dest */ 710 str r9, [r1, #4] 711 712 /* [E] */ 713 /* [F] */ 714 add r1, r1, r2 715 bne 1b 716 717 pop {r4-r10} 718 bx lr 719endfunc 720