1@ 2@ ARMv4 optimized DSP utils 3@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp> 4@ 5@ This file is part of FFmpeg. 6@ 7@ FFmpeg is free software; you can redistribute it and/or 8@ modify it under the terms of the GNU Lesser General Public 9@ License as published by the Free Software Foundation; either 10@ version 2.1 of the License, or (at your option) any later version. 11@ 12@ FFmpeg is distributed in the hope that it will be useful, 13@ but WITHOUT ANY WARRANTY; without even the implied warranty of 14@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15@ Lesser General Public License for more details. 16@ 17@ You should have received a copy of the GNU Lesser General Public 18@ License along with FFmpeg; if not, write to the Free Software 19@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20@ 21 22#include "config.h" 23#include "asm.S" 24 25 preserve8 26 27#if !HAVE_PLD 28.macro pld reg 29.endm 30#endif 31 32#if HAVE_ARMV5TE 33function ff_prefetch_arm, export=1 34 subs r2, r2, #1 35 pld [r0] 36 add r0, r0, r1 37 bne ff_prefetch_arm 38 bx lr 39 .endfunc 40#endif 41 42.macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 43 mov \Rd0, \Rn0, lsr #(\shift * 8) 44 mov \Rd1, \Rn1, lsr #(\shift * 8) 45 mov \Rd2, \Rn2, lsr #(\shift * 8) 46 mov \Rd3, \Rn3, lsr #(\shift * 8) 47 orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) 48 orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) 49 orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) 50 orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) 51.endm 52.macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2 53 mov \R0, \R0, lsr #(\shift * 8) 54 orr \R0, \R0, \R1, lsl #(32 - \shift * 8) 55 mov \R1, \R1, lsr #(\shift * 8) 56 orr \R1, \R1, \R2, lsl #(32 - \shift * 8) 57.endm 58.macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 59 mov \Rdst0, \Rsrc0, lsr #(\shift * 8) 60 mov \Rdst1, \Rsrc1, lsr #(\shift * 8) 61 orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) 62 orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) 63.endm 64 65.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask 66 @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) 67 @ Rmask = 0xFEFEFEFE 68 @ Rn = destroy 69 eor \Rd0, \Rn0, \Rm0 70 eor \Rd1, \Rn1, \Rm1 71 orr \Rn0, \Rn0, \Rm0 72 orr \Rn1, \Rn1, \Rm1 73 and \Rd0, \Rd0, \Rmask 74 and \Rd1, \Rd1, \Rmask 75 sub \Rd0, \Rn0, \Rd0, lsr #1 76 sub \Rd1, \Rn1, \Rd1, lsr #1 77.endm 78 79.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask 80 @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) 81 @ Rmask = 0xFEFEFEFE 82 @ Rn = destroy 83 eor \Rd0, \Rn0, \Rm0 84 eor \Rd1, \Rn1, \Rm1 85 and \Rn0, \Rn0, \Rm0 86 and \Rn1, \Rn1, \Rm1 87 and \Rd0, \Rd0, \Rmask 88 and \Rd1, \Rd1, \Rmask 89 add \Rd0, \Rn0, \Rd0, lsr #1 90 add \Rd1, \Rn1, \Rd1, lsr #1 91.endm 92 93.macro JMP_ALIGN tmp, reg 94 ands \tmp, \reg, #3 95 bic \reg, \reg, #3 96 beq 1f 97 subs \tmp, \tmp, #1 98 beq 2f 99 subs \tmp, \tmp, #1 100 beq 3f 101 b 4f 102.endm 103 104@ ---------------------------------------------------------------- 105 .align 5 106function put_pixels16_arm, export=1 107 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 108 @ block = word aligned, pixles = unaligned 109 pld [r1] 110 stmfd sp!, {r4-r11, lr} @ R14 is also called LR 111 JMP_ALIGN r5, r1 1121: 113 ldmia r1, {r4-r7} 114 add r1, r1, r2 115 stmia r0, {r4-r7} 116 pld [r1] 117 subs r3, r3, #1 118 add r0, r0, r2 119 bne 1b 120 ldmfd sp!, {r4-r11, pc} 121 .align 5 1222: 123 ldmia r1, {r4-r8} 124 add r1, r1, r2 125 ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 126 pld [r1] 127 subs r3, r3, #1 128 stmia r0, {r9-r12} 129 add r0, r0, r2 130 bne 2b 131 ldmfd sp!, {r4-r11, pc} 132 .align 5 1333: 134 ldmia r1, {r4-r8} 135 add r1, r1, r2 136 ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 137 pld [r1] 138 subs r3, r3, #1 139 stmia r0, {r9-r12} 140 add r0, r0, r2 141 bne 3b 142 ldmfd sp!, {r4-r11, pc} 143 .align 5 1444: 145 ldmia r1, {r4-r8} 146 add r1, r1, r2 147 ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 148 pld [r1] 149 subs r3, r3, #1 150 stmia r0, {r9-r12} 151 add r0, r0, r2 152 bne 4b 153 ldmfd sp!, {r4-r11,pc} 154 .endfunc 155 156@ ---------------------------------------------------------------- 157 .align 5 158function put_pixels8_arm, export=1 159 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 160 @ block = word aligned, pixles = unaligned 161 pld [r1] 162 stmfd sp!, {r4-r5,lr} @ R14 is also called LR 163 JMP_ALIGN r5, r1 1641: 165 ldmia r1, {r4-r5} 166 add r1, r1, r2 167 subs r3, r3, #1 168 pld [r1] 169 stmia r0, {r4-r5} 170 add r0, r0, r2 171 bne 1b 172 ldmfd sp!, {r4-r5,pc} 173 .align 5 1742: 175 ldmia r1, {r4-r5, r12} 176 add r1, r1, r2 177 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 178 pld [r1] 179 subs r3, r3, #1 180 stmia r0, {r4-r5} 181 add r0, r0, r2 182 bne 2b 183 ldmfd sp!, {r4-r5,pc} 184 .align 5 1853: 186 ldmia r1, {r4-r5, r12} 187 add r1, r1, r2 188 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 189 pld [r1] 190 subs r3, r3, #1 191 stmia r0, {r4-r5} 192 add r0, r0, r2 193 bne 3b 194 ldmfd sp!, {r4-r5,pc} 195 .align 5 1964: 197 ldmia r1, {r4-r5, r12} 198 add r1, r1, r2 199 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 200 pld [r1] 201 subs r3, r3, #1 202 stmia r0, {r4-r5} 203 add r0, r0, r2 204 bne 4b 205 ldmfd sp!, {r4-r5,pc} 206 .endfunc 207 208@ ---------------------------------------------------------------- 209 .align 5 210function put_pixels8_x2_arm, export=1 211 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 212 @ block = word aligned, pixles = unaligned 213 pld [r1] 214 stmfd sp!, {r4-r10,lr} @ R14 is also called LR 215 ldr r12, =0xfefefefe 216 JMP_ALIGN r5, r1 2171: 218 ldmia r1, {r4-r5, r10} 219 add r1, r1, r2 220 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 221 pld [r1] 222 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 223 subs r3, r3, #1 224 stmia r0, {r8-r9} 225 add r0, r0, r2 226 bne 1b 227 ldmfd sp!, {r4-r10,pc} 228 .align 5 2292: 230 ldmia r1, {r4-r5, r10} 231 add r1, r1, r2 232 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 233 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 234 pld [r1] 235 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 236 subs r3, r3, #1 237 stmia r0, {r4-r5} 238 add r0, r0, r2 239 bne 2b 240 ldmfd sp!, {r4-r10,pc} 241 .align 5 2423: 243 ldmia r1, {r4-r5, r10} 244 add r1, r1, r2 245 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 246 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 247 pld [r1] 248 RND_AVG32 r4, r5, r6, r7, r8, r9, r12 249 subs r3, r3, #1 250 stmia r0, {r4-r5} 251 add r0, r0, r2 252 bne 3b 253 ldmfd sp!, {r4-r10,pc} 254 .align 5 2554: 256 ldmia r1, {r4-r5, r10} 257 add r1, r1, r2 258 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 259 pld [r1] 260 RND_AVG32 r8, r9, r6, r7, r5, r10, r12 261 subs r3, r3, #1 262 stmia r0, {r8-r9} 263 add r0, r0, r2 264 bne 4b 265 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. 266 .endfunc 267 268 .align 5 269function put_no_rnd_pixels8_x2_arm, export=1 270 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 271 @ block = word aligned, pixles = unaligned 272 pld [r1] 273 stmfd sp!, {r4-r10,lr} @ R14 is also called LR 274 ldr r12, =0xfefefefe 275 JMP_ALIGN r5, r1 2761: 277 ldmia r1, {r4-r5, r10} 278 add r1, r1, r2 279 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 280 pld [r1] 281 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 282 subs r3, r3, #1 283 stmia r0, {r8-r9} 284 add r0, r0, r2 285 bne 1b 286 ldmfd sp!, {r4-r10,pc} 287 .align 5 2882: 289 ldmia r1, {r4-r5, r10} 290 add r1, r1, r2 291 ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 292 ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 293 pld [r1] 294 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 295 subs r3, r3, #1 296 stmia r0, {r4-r5} 297 add r0, r0, r2 298 bne 2b 299 ldmfd sp!, {r4-r10,pc} 300 .align 5 3013: 302 ldmia r1, {r4-r5, r10} 303 add r1, r1, r2 304 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 305 ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 306 pld [r1] 307 NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 308 subs r3, r3, #1 309 stmia r0, {r4-r5} 310 add r0, r0, r2 311 bne 3b 312 ldmfd sp!, {r4-r10,pc} 313 .align 5 3144: 315 ldmia r1, {r4-r5, r10} 316 add r1, r1, r2 317 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 318 pld [r1] 319 NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 320 subs r3, r3, #1 321 stmia r0, {r8-r9} 322 add r0, r0, r2 323 bne 4b 324 ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. 325 .endfunc 326 327 328@ ---------------------------------------------------------------- 329 .align 5 330function put_pixels8_y2_arm, export=1 331 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 332 @ block = word aligned, pixles = unaligned 333 pld [r1] 334 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 335 mov r3, r3, lsr #1 336 ldr r12, =0xfefefefe 337 JMP_ALIGN r5, r1 3381: 339 ldmia r1, {r4-r5} 340 add r1, r1, r2 3416: ldmia r1, {r6-r7} 342 add r1, r1, r2 343 pld [r1] 344 RND_AVG32 r8, r9, r4, r5, r6, r7, r12 345 ldmia r1, {r4-r5} 346 add r1, r1, r2 347 stmia r0, {r8-r9} 348 add r0, r0, r2 349 pld [r1] 350 RND_AVG32 r8, r9, r6, r7, r4, r5, r12 351 subs r3, r3, #1 352 stmia r0, {r8-r9} 353 add r0, r0, r2 354 bne 6b 355 ldmfd sp!, {r4-r11,pc} 356 .align 5 3572: 358 ldmia r1, {r4-r6} 359 add r1, r1, r2 360 pld [r1] 361 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 3626: ldmia r1, {r7-r9} 363 add r1, r1, r2 364 pld [r1] 365 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 366 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 367 stmia r0, {r10-r11} 368 add r0, r0, r2 369 ldmia r1, {r4-r6} 370 add r1, r1, r2 371 pld [r1] 372 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 373 subs r3, r3, #1 374 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 375 stmia r0, {r10-r11} 376 add r0, r0, r2 377 bne 6b 378 ldmfd sp!, {r4-r11,pc} 379 .align 5 3803: 381 ldmia r1, {r4-r6} 382 add r1, r1, r2 383 pld [r1] 384 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 3856: ldmia r1, {r7-r9} 386 add r1, r1, r2 387 pld [r1] 388 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 389 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 390 stmia r0, {r10-r11} 391 add r0, r0, r2 392 ldmia r1, {r4-r6} 393 add r1, r1, r2 394 pld [r1] 395 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 396 subs r3, r3, #1 397 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 398 stmia r0, {r10-r11} 399 add r0, r0, r2 400 bne 6b 401 ldmfd sp!, {r4-r11,pc} 402 .align 5 4034: 404 ldmia r1, {r4-r6} 405 add r1, r1, r2 406 pld [r1] 407 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 4086: ldmia r1, {r7-r9} 409 add r1, r1, r2 410 pld [r1] 411 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 412 RND_AVG32 r10, r11, r4, r5, r7, r8, r12 413 stmia r0, {r10-r11} 414 add r0, r0, r2 415 ldmia r1, {r4-r6} 416 add r1, r1, r2 417 pld [r1] 418 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 419 subs r3, r3, #1 420 RND_AVG32 r10, r11, r7, r8, r4, r5, r12 421 stmia r0, {r10-r11} 422 add r0, r0, r2 423 bne 6b 424 ldmfd sp!, {r4-r11,pc} 425 .endfunc 426 427 .align 5 428function put_no_rnd_pixels8_y2_arm, export=1 429 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 430 @ block = word aligned, pixles = unaligned 431 pld [r1] 432 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 433 mov r3, r3, lsr #1 434 ldr r12, =0xfefefefe 435 JMP_ALIGN r5, r1 4361: 437 ldmia r1, {r4-r5} 438 add r1, r1, r2 4396: ldmia r1, {r6-r7} 440 add r1, r1, r2 441 pld [r1] 442 NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 443 ldmia r1, {r4-r5} 444 add r1, r1, r2 445 stmia r0, {r8-r9} 446 add r0, r0, r2 447 pld [r1] 448 NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 449 subs r3, r3, #1 450 stmia r0, {r8-r9} 451 add r0, r0, r2 452 bne 6b 453 ldmfd sp!, {r4-r11,pc} 454 .align 5 4552: 456 ldmia r1, {r4-r6} 457 add r1, r1, r2 458 pld [r1] 459 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 4606: ldmia r1, {r7-r9} 461 add r1, r1, r2 462 pld [r1] 463 ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 464 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 465 stmia r0, {r10-r11} 466 add r0, r0, r2 467 ldmia r1, {r4-r6} 468 add r1, r1, r2 469 pld [r1] 470 ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 471 subs r3, r3, #1 472 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 473 stmia r0, {r10-r11} 474 add r0, r0, r2 475 bne 6b 476 ldmfd sp!, {r4-r11,pc} 477 .align 5 4783: 479 ldmia r1, {r4-r6} 480 add r1, r1, r2 481 pld [r1] 482 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 4836: ldmia r1, {r7-r9} 484 add r1, r1, r2 485 pld [r1] 486 ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 487 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 488 stmia r0, {r10-r11} 489 add r0, r0, r2 490 ldmia r1, {r4-r6} 491 add r1, r1, r2 492 pld [r1] 493 ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 494 subs r3, r3, #1 495 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 496 stmia r0, {r10-r11} 497 add r0, r0, r2 498 bne 6b 499 ldmfd sp!, {r4-r11,pc} 500 .align 5 5014: 502 ldmia r1, {r4-r6} 503 add r1, r1, r2 504 pld [r1] 505 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 5066: ldmia r1, {r7-r9} 507 add r1, r1, r2 508 pld [r1] 509 ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 510 NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 511 stmia r0, {r10-r11} 512 add r0, r0, r2 513 ldmia r1, {r4-r6} 514 add r1, r1, r2 515 pld [r1] 516 ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 517 subs r3, r3, #1 518 NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 519 stmia r0, {r10-r11} 520 add r0, r0, r2 521 bne 6b 522 ldmfd sp!, {r4-r11,pc} 523 .endfunc 524 525 .ltorg 526 527@ ---------------------------------------------------------------- 528.macro RND_XY2_IT align, rnd 529 @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) 530 @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) 531.if \align == 0 532 ldmia r1, {r6-r8} 533.elseif \align == 3 534 ldmia r1, {r5-r7} 535.else 536 ldmia r1, {r8-r10} 537.endif 538 add r1, r1, r2 539 pld [r1] 540.if \align == 0 541 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 542.elseif \align == 1 543 ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10 544 ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10 545.elseif \align == 2 546 ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10 547 ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10 548.elseif \align == 3 549 ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7 550.endif 551 ldr r14, =0x03030303 552 tst r3, #1 553 and r8, r4, r14 554 and r9, r5, r14 555 and r10, r6, r14 556 and r11, r7, r14 557 andeq r14, r14, r14, \rnd #1 558 add r8, r8, r10 559 add r9, r9, r11 560 ldr r12, =0xfcfcfcfc >> 2 561 addeq r8, r8, r14 562 addeq r9, r9, r14 563 and r4, r12, r4, lsr #2 564 and r5, r12, r5, lsr #2 565 and r6, r12, r6, lsr #2 566 and r7, r12, r7, lsr #2 567 add r10, r4, r6 568 add r11, r5, r7 569 subs r3, r3, #1 570.endm 571 572.macro RND_XY2_EXPAND align, rnd 573 RND_XY2_IT \align, \rnd 5746: stmfd sp!, {r8-r11} 575 RND_XY2_IT \align, \rnd 576 ldmfd sp!, {r4-r7} 577 add r4, r4, r8 578 add r5, r5, r9 579 ldr r14, =0x0f0f0f0f 580 add r6, r6, r10 581 add r7, r7, r11 582 and r4, r14, r4, lsr #2 583 and r5, r14, r5, lsr #2 584 add r4, r4, r6 585 add r5, r5, r7 586 stmia r0, {r4-r5} 587 add r0, r0, r2 588 bge 6b 589 ldmfd sp!, {r4-r11,pc} 590.endm 591 592 .align 5 593function put_pixels8_xy2_arm, export=1 594 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 595 @ block = word aligned, pixles = unaligned 596 pld [r1] 597 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 598 JMP_ALIGN r5, r1 5991: 600 RND_XY2_EXPAND 0, lsl 601 602 .align 5 6032: 604 RND_XY2_EXPAND 1, lsl 605 606 .align 5 6073: 608 RND_XY2_EXPAND 2, lsl 609 610 .align 5 6114: 612 RND_XY2_EXPAND 3, lsl 613 .endfunc 614 615 .align 5 616function put_no_rnd_pixels8_xy2_arm, export=1 617 @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) 618 @ block = word aligned, pixles = unaligned 619 pld [r1] 620 stmfd sp!, {r4-r11,lr} @ R14 is also called LR 621 JMP_ALIGN r5, r1 6221: 623 RND_XY2_EXPAND 0, lsr 624 625 .align 5 6262: 627 RND_XY2_EXPAND 1, lsr 628 629 .align 5 6303: 631 RND_XY2_EXPAND 2, lsr 632 633 .align 5 6344: 635 RND_XY2_EXPAND 3, lsr 636 .endfunc 637 638 .align 5 639@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride) 640function ff_add_pixels_clamped_ARM, export=1 641 push {r4-r10} 642 mov r10, #8 6431: 644 ldr r4, [r1] /* load dest */ 645 /* block[0] and block[1]*/ 646 ldrsh r5, [r0] 647 ldrsh r7, [r0, #2] 648 and r6, r4, #0xFF 649 and r8, r4, #0xFF00 650 add r6, r5, r6 651 add r8, r7, r8, lsr #8 652 mvn r5, r5 653 mvn r7, r7 654 tst r6, #0x100 655 movne r6, r5, lsr #24 656 tst r8, #0x100 657 movne r8, r7, lsr #24 658 mov r9, r6 659 ldrsh r5, [r0, #4] /* moved form [A] */ 660 orr r9, r9, r8, lsl #8 661 /* block[2] and block[3] */ 662 /* [A] */ 663 ldrsh r7, [r0, #6] 664 and r6, r4, #0xFF0000 665 and r8, r4, #0xFF000000 666 add r6, r5, r6, lsr #16 667 add r8, r7, r8, lsr #24 668 mvn r5, r5 669 mvn r7, r7 670 tst r6, #0x100 671 movne r6, r5, lsr #24 672 tst r8, #0x100 673 movne r8, r7, lsr #24 674 orr r9, r9, r6, lsl #16 675 ldr r4, [r1, #4] /* moved form [B] */ 676 orr r9, r9, r8, lsl #24 677 /* store dest */ 678 ldrsh r5, [r0, #8] /* moved form [C] */ 679 str r9, [r1] 680 681 /* load dest */ 682 /* [B] */ 683 /* block[4] and block[5] */ 684 /* [C] */ 685 ldrsh r7, [r0, #10] 686 and r6, r4, #0xFF 687 and r8, r4, #0xFF00 688 add r6, r5, r6 689 add r8, r7, r8, lsr #8 690 mvn r5, r5 691 mvn r7, r7 692 tst r6, #0x100 693 movne r6, r5, lsr #24 694 tst r8, #0x100 695 movne r8, r7, lsr #24 696 mov r9, r6 697 ldrsh r5, [r0, #12] /* moved from [D] */ 698 orr r9, r9, r8, lsl #8 699 /* block[6] and block[7] */ 700 /* [D] */ 701 ldrsh r7, [r0, #14] 702 and r6, r4, #0xFF0000 703 and r8, r4, #0xFF000000 704 add r6, r5, r6, lsr #16 705 add r8, r7, r8, lsr #24 706 mvn r5, r5 707 mvn r7, r7 708 tst r6, #0x100 709 movne r6, r5, lsr #24 710 tst r8, #0x100 711 movne r8, r7, lsr #24 712 orr r9, r9, r6, lsl #16 713 add r0, r0, #16 /* moved from [E] */ 714 orr r9, r9, r8, lsl #24 715 subs r10, r10, #1 /* moved from [F] */ 716 /* store dest */ 717 str r9, [r1, #4] 718 719 /* [E] */ 720 /* [F] */ 721 add r1, r1, r2 722 bne 1b 723 724 pop {r4-r10} 725 bx lr 726 .endfunc 727