1/* 2 * Copyright (c) 2014 RISC OS Open Ltd 3 * Author: Ben Avison <bavison@riscosopen.org> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "libavutil/arm/asm.S" 23 24.macro loadregoffsh2 group, index, base, offgroup, offindex 25 .altmacro 26 loadregoffsh2_ \group, %(\index), \base, \offgroup, %(\offindex) 27 .noaltmacro 28.endm 29 30.macro loadregoffsh2_ group, index, base, offgroup, offindex 31 ldr \group\index, [\base, \offgroup\offindex, lsl #2] 32.endm 33 34.macro eorlslreg check, data, group, index 35 .altmacro 36 eorlslreg_ \check, \data, \group, %(\index) 37 .noaltmacro 38.endm 39 40.macro eorlslreg_ check, data, group, index 41 eor \check, \check, \data, lsl \group\index 42.endm 43 44.macro decr_modulo var, by, modulus 45 .set \var, \var - \by 46 .if \var == 0 47 .set \var, \modulus 48 .endif 49.endm 50 51 .macro load_group1 size, channels, r0, r1, r2, r3, pointer_dead=0 52 .if \size == 2 53 ldrd \r0, \r1, [IN], #(\size + 8 - \channels) * 4 54 .else // size == 4 55 .if IDX1 > 4 || \channels==8 56 ldm IN!, {\r0, \r1, \r2, \r3} 57 .else 58 ldm IN, {\r0, \r1, \r2, \r3} 59 .if !\pointer_dead 60 add IN, IN, #(4 + 8 - \channels) * 4 61 .endif 62 .endif 63 .endif 64 decr_modulo IDX1, \size, \channels 65 .endm 66 67 .macro load_group2 size, channels, r0, r1, r2, r3, pointer_dead=0 68 .if \size == 2 69 .if IDX1 > 2 70 ldm IN!, {\r2, \r3} 71 .else 72//A .ifc \r2, ip 73//A .if \pointer_dead 74//A ldm IN, {\r2, \r3} 75//A .else 76//A ldr \r2, [IN], #4 77//A ldr \r3, [IN], #(\size - 1 + 8 - \channels) * 4 78//A .endif 79//A .else 80 ldrd \r2, \r3, [IN], #(\size + 8 - \channels) * 4 81//A .endif 82 .endif 83 .endif 84 decr_modulo IDX1, \size, \channels 85 .endm 86 87.macro implement_pack inorder, channels, shift 88.if \inorder 89.ifc \shift, mixed 90 91CHECK .req a1 92COUNT .req a2 93IN .req a3 94OUT .req a4 95DAT0 .req v1 96DAT1 .req v2 97DAT2 .req v3 98DAT3 .req v4 99SHIFT0 .req v5 100SHIFT1 .req v6 101SHIFT2 .req sl 102SHIFT3 .req fp 103SHIFT4 .req ip 104SHIFT5 .req lr 105 106 .macro output4words 107 .set SIZE_GROUP1, IDX1 108 .if SIZE_GROUP1 > 4 109 .set SIZE_GROUP1, 4 110 .endif 111 .set SIZE_GROUP2, 4 - SIZE_GROUP1 112 load_group1 SIZE_GROUP1, \channels, DAT0, DAT1, DAT2, DAT3 113 load_group2 SIZE_GROUP2, \channels, DAT0, DAT1, DAT2, DAT3 114 .if \channels == 2 115 lsl DAT0, SHIFT0 116 lsl DAT1, SHIFT1 117 lsl DAT2, SHIFT0 118 lsl DAT3, SHIFT1 119 .elseif \channels == 6 120 .if IDX2 == 6 121 lsl DAT0, SHIFT0 122 lsl DAT1, SHIFT1 123 lsl DAT2, SHIFT2 124 lsl DAT3, SHIFT3 125 .elseif IDX2 == 2 126 lsl DAT0, SHIFT4 127 lsl DAT1, SHIFT5 128 lsl DAT2, SHIFT0 129 lsl DAT3, SHIFT1 130 .else // IDX2 == 4 131 lsl DAT0, SHIFT2 132 lsl DAT1, SHIFT3 133 lsl DAT2, SHIFT4 134 lsl DAT3, SHIFT5 135 .endif 136 .elseif \channels == 8 137 .if IDX2 == 8 138 uxtb SHIFT0, SHIFT4, ror #0 139 uxtb SHIFT1, SHIFT4, ror #8 140 uxtb SHIFT2, SHIFT4, ror #16 141 uxtb SHIFT3, SHIFT4, ror #24 142 .else 143 uxtb SHIFT0, SHIFT5, ror #0 144 uxtb SHIFT1, SHIFT5, ror #8 145 uxtb SHIFT2, SHIFT5, ror #16 146 uxtb SHIFT3, SHIFT5, ror #24 147 .endif 148 lsl DAT0, SHIFT0 149 lsl DAT1, SHIFT1 150 lsl DAT2, SHIFT2 151 lsl DAT3, SHIFT3 152 .endif 153 eor CHECK, CHECK, DAT0, lsr #8 - (\channels - IDX2) 154 eor CHECK, CHECK, DAT1, lsr #7 - (\channels - IDX2) 155 decr_modulo IDX2, 2, \channels 156 eor CHECK, CHECK, DAT2, lsr #8 - (\channels - IDX2) 157 eor CHECK, CHECK, DAT3, lsr #7 - (\channels - IDX2) 158 decr_modulo IDX2, 2, \channels 159 stm OUT!, {DAT0 - DAT3} 160 .endm 161 162 .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4) 163 .if (WORDS_PER_LOOP % 2) == 0 164 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 165 .endif 166 .if (WORDS_PER_LOOP % 2) == 0 167 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 168 .endif 169 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4 170 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels 171 172function ff_mlp_pack_output_inorder_\channels\()ch_mixedshift_armv6, export=1 173 .if SAMPLES_PER_LOOP > 1 174 tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice 175 it ne 176 bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not 177 .endif 178 teq COUNT, #0 179 it eq 180 bxeq lr 181 push {v1-v6,sl,fp,lr} 182 ldr SHIFT0, [sp, #(9+1)*4] // get output_shift from stack 183 ldr SHIFT1, =0x08080808 184 ldr SHIFT4, [SHIFT0] 185 .if \channels == 2 186 uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8 187 uxtb SHIFT0, SHIFT4, ror #0 188 uxtb SHIFT1, SHIFT4, ror #8 189 .else 190 ldr SHIFT5, [SHIFT0, #4] 191 uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8 192 uadd8 SHIFT5, SHIFT5, SHIFT1 193 .if \channels == 6 194 uxtb SHIFT0, SHIFT4, ror #0 195 uxtb SHIFT1, SHIFT4, ror #8 196 uxtb SHIFT2, SHIFT4, ror #16 197 uxtb SHIFT3, SHIFT4, ror #24 198 uxtb SHIFT4, SHIFT5, ror #0 199 uxtb SHIFT5, SHIFT5, ror #8 200 .endif 201 .endif 202 .set IDX1, \channels 203 .set IDX2, \channels 2040: 205 .rept WORDS_PER_LOOP / 4 206 output4words 207 .endr 208 subs COUNT, COUNT, #SAMPLES_PER_LOOP 209 bne 0b 210 pop {v1-v6,sl,fp,pc} 211 .ltorg 212endfunc 213 .purgem output4words 214 215 .unreq CHECK 216 .unreq COUNT 217 .unreq IN 218 .unreq OUT 219 .unreq DAT0 220 .unreq DAT1 221 .unreq DAT2 222 .unreq DAT3 223 .unreq SHIFT0 224 .unreq SHIFT1 225 .unreq SHIFT2 226 .unreq SHIFT3 227 .unreq SHIFT4 228 .unreq SHIFT5 229 230.else // not mixed 231 232CHECK .req a1 233COUNT .req a2 234IN .req a3 235OUT .req a4 236DAT0 .req v1 237DAT1 .req v2 238DAT2 .req v3 239DAT3 .req v4 240DAT4 .req v5 241DAT5 .req v6 242DAT6 .req sl // use these rather than the otherwise unused 243DAT7 .req fp // ip and lr so that we can load them usinf LDRD 244 245 .macro output4words tail, head, r0, r1, r2, r3, r4, r5, r6, r7, pointer_dead=0 246 .if \head 247 .set SIZE_GROUP1, IDX1 248 .if SIZE_GROUP1 > 4 249 .set SIZE_GROUP1, 4 250 .endif 251 .set SIZE_GROUP2, 4 - SIZE_GROUP1 252 load_group1 SIZE_GROUP1, \channels, \r0, \r1, \r2, \r3, \pointer_dead 253 .endif 254 .if \tail 255 eor CHECK, CHECK, \r4, lsr #8 - (\channels - IDX2) 256 eor CHECK, CHECK, \r5, lsr #7 - (\channels - IDX2) 257 decr_modulo IDX2, 2, \channels 258 .endif 259 .if \head 260 load_group2 SIZE_GROUP2, \channels, \r0, \r1, \r2, \r3, \pointer_dead 261 .endif 262 .if \tail 263 eor CHECK, CHECK, \r6, lsr #8 - (\channels - IDX2) 264 eor CHECK, CHECK, \r7, lsr #7 - (\channels - IDX2) 265 decr_modulo IDX2, 2, \channels 266 stm OUT!, {\r4, \r5, \r6, \r7} 267 .endif 268 .if \head 269 lsl \r0, #8 + \shift 270 lsl \r1, #8 + \shift 271 lsl \r2, #8 + \shift 272 lsl \r3, #8 + \shift 273 .endif 274 .endm 275 276 .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 8) 277 .if (WORDS_PER_LOOP % 2) == 0 278 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 279 .endif 280 .if (WORDS_PER_LOOP % 2) == 0 281 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 282 .endif 283 .if (WORDS_PER_LOOP % 2) == 0 284 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 285 .endif 286 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 8 287 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels 288 289function ff_mlp_pack_output_inorder_\channels\()ch_\shift\()shift_armv6, export=1 290 .if SAMPLES_PER_LOOP > 1 291 tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice 292 it ne 293 bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not 294 .endif 295 subs COUNT, COUNT, #SAMPLES_PER_LOOP 296 it lo 297 bxlo lr 298 push {v1-v6,sl,fp,lr} 299 .set IDX1, \channels 300 .set IDX2, \channels 301 output4words 0, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7 3020: beq 1f 303 .rept WORDS_PER_LOOP / 8 304 output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3 305 output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7 306 .endr 307 subs COUNT, COUNT, #SAMPLES_PER_LOOP 308 bne 0b 3091: 310 .rept WORDS_PER_LOOP / 8 - 1 311 output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3 312 output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7 313 .endr 314 output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3, pointer_dead=1 315 output4words 1, 0, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7 316 pop {v1-v6,sl,fp,pc} 317endfunc 318 .purgem output4words 319 320 .unreq CHECK 321 .unreq COUNT 322 .unreq IN 323 .unreq OUT 324 .unreq DAT0 325 .unreq DAT1 326 .unreq DAT2 327 .unreq DAT3 328 .unreq DAT4 329 .unreq DAT5 330 .unreq DAT6 331 .unreq DAT7 332 333.endif // mixed 334.else // not inorder 335.ifc \shift, mixed 336 337// This case not currently handled 338 339.else // not mixed 340 341#if !CONFIG_THUMB 342 343CHECK .req a1 344COUNT .req a2 345IN .req a3 346OUT .req a4 347DAT0 .req v1 348DAT1 .req v2 349DAT2 .req v3 350DAT3 .req v4 351CHAN0 .req v5 352CHAN1 .req v6 353CHAN2 .req sl 354CHAN3 .req fp 355CHAN4 .req ip 356CHAN5 .req lr 357 358 .macro output4words 359 .if \channels == 8 360 .if IDX1 == 8 361 uxtb CHAN0, CHAN4, ror #0 362 uxtb CHAN1, CHAN4, ror #8 363 uxtb CHAN2, CHAN4, ror #16 364 uxtb CHAN3, CHAN4, ror #24 365 .else 366 uxtb CHAN0, CHAN5, ror #0 367 uxtb CHAN1, CHAN5, ror #8 368 uxtb CHAN2, CHAN5, ror #16 369 uxtb CHAN3, CHAN5, ror #24 370 .endif 371 ldr DAT0, [IN, CHAN0, lsl #2] 372 ldr DAT1, [IN, CHAN1, lsl #2] 373 ldr DAT2, [IN, CHAN2, lsl #2] 374 ldr DAT3, [IN, CHAN3, lsl #2] 375 .if IDX1 == 4 376 add IN, IN, #8*4 377 .endif 378 decr_modulo IDX1, 4, \channels 379 .else 380 .set SIZE_GROUP1, IDX1 381 .if SIZE_GROUP1 > 4 382 .set SIZE_GROUP1, 4 383 .endif 384 .set SIZE_GROUP2, 4 - SIZE_GROUP1 385 .if SIZE_GROUP1 == 2 386 loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1) 387 loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1) 388 add IN, IN, #8*4 389 .else // SIZE_GROUP1 == 4 390 loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1) 391 loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1) 392 loadregoffsh2 DAT, 2, IN, CHAN, 2 + (\channels - IDX1) 393 loadregoffsh2 DAT, 3, IN, CHAN, 3 + (\channels - IDX1) 394 .if IDX1 == 4 395 add IN, IN, #8*4 396 .endif 397 .endif 398 decr_modulo IDX1, SIZE_GROUP1, \channels 399 .if SIZE_GROUP2 == 2 400 loadregoffsh2 DAT, 2, IN, CHAN, 0 + (\channels - IDX1) 401 loadregoffsh2 DAT, 3, IN, CHAN, 1 + (\channels - IDX1) 402 .if IDX1 == 2 403 add IN, IN, #8*4 404 .endif 405 .endif 406 decr_modulo IDX1, SIZE_GROUP2, \channels 407 .endif 408 .if \channels == 8 // in this case we can corrupt CHAN0-3 409 rsb CHAN0, CHAN0, #8 410 rsb CHAN1, CHAN1, #8 411 rsb CHAN2, CHAN2, #8 412 rsb CHAN3, CHAN3, #8 413 lsl DAT0, #8 + \shift 414 lsl DAT1, #8 + \shift 415 lsl DAT2, #8 + \shift 416 lsl DAT3, #8 + \shift 417 eor CHECK, CHECK, DAT0, lsr CHAN0 418 eor CHECK, CHECK, DAT1, lsr CHAN1 419 eor CHECK, CHECK, DAT2, lsr CHAN2 420 eor CHECK, CHECK, DAT3, lsr CHAN3 421 .else 422 .if \shift != 0 423 lsl DAT0, #\shift 424 lsl DAT1, #\shift 425 lsl DAT2, #\shift 426 lsl DAT3, #\shift 427 .endif 428 bic DAT0, DAT0, #0xff000000 429 bic DAT1, DAT1, #0xff000000 430 bic DAT2, DAT2, #0xff000000 431 bic DAT3, DAT3, #0xff000000 432 eorlslreg CHECK, DAT0, CHAN, 0 + (\channels - IDX2) 433 eorlslreg CHECK, DAT1, CHAN, 1 + (\channels - IDX2) 434 decr_modulo IDX2, 2, \channels 435 eorlslreg CHECK, DAT2, CHAN, 0 + (\channels - IDX2) 436 eorlslreg CHECK, DAT3, CHAN, 1 + (\channels - IDX2) 437 decr_modulo IDX2, 2, \channels 438 lsl DAT0, #8 439 lsl DAT1, #8 440 lsl DAT2, #8 441 lsl DAT3, #8 442 .endif 443 stm OUT!, {DAT0 - DAT3} 444 .endm 445 446 .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4) 447 .if (WORDS_PER_LOOP % 2) == 0 448 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 449 .endif 450 .if (WORDS_PER_LOOP % 2) == 0 451 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 452 .endif 453 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4 454 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels 455 456function ff_mlp_pack_output_outoforder_\channels\()ch_\shift\()shift_armv6, export=1 457 .if SAMPLES_PER_LOOP > 1 458 tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice 459 it ne 460 bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not 461 .endif 462 teq COUNT, #0 463 it eq 464 bxeq lr 465 push {v1-v6,sl,fp,lr} 466 ldr CHAN0, [sp, #(9+0)*4] // get ch_assign from stack 467 ldr CHAN4, [CHAN0] 468 .if \channels == 2 469 uxtb CHAN0, CHAN4, ror #0 470 uxtb CHAN1, CHAN4, ror #8 471 .else 472 ldr CHAN5, [CHAN0, #4] 473 .if \channels == 6 474 uxtb CHAN0, CHAN4, ror #0 475 uxtb CHAN1, CHAN4, ror #8 476 uxtb CHAN2, CHAN4, ror #16 477 uxtb CHAN3, CHAN4, ror #24 478 uxtb CHAN4, CHAN5, ror #0 479 uxtb CHAN5, CHAN5, ror #8 480 .endif 481 .endif 482 .set IDX1, \channels 483 .set IDX2, \channels 4840: 485 .rept WORDS_PER_LOOP / 4 486 output4words 487 .endr 488 subs COUNT, COUNT, #SAMPLES_PER_LOOP 489 bne 0b 490 pop {v1-v6,sl,fp,pc} 491 .ltorg 492endfunc 493 .purgem output4words 494 495 .unreq CHECK 496 .unreq COUNT 497 .unreq IN 498 .unreq OUT 499 .unreq DAT0 500 .unreq DAT1 501 .unreq DAT2 502 .unreq DAT3 503 .unreq CHAN0 504 .unreq CHAN1 505 .unreq CHAN2 506 .unreq CHAN3 507 .unreq CHAN4 508 .unreq CHAN5 509 510#endif // !CONFIG_THUMB 511 512.endif // mixed 513.endif // inorder 514.endm // implement_pack 515 516.macro pack_channels inorder, channels 517 implement_pack \inorder, \channels, 0 518 implement_pack \inorder, \channels, 1 519 implement_pack \inorder, \channels, 2 520 implement_pack \inorder, \channels, 3 521 implement_pack \inorder, \channels, 4 522 implement_pack \inorder, \channels, 5 523 implement_pack \inorder, \channels, mixed 524.endm 525 526.macro pack_order inorder 527 pack_channels \inorder, 2 528 pack_channels \inorder, 6 529 pack_channels \inorder, 8 530.endm 531 532 pack_order 0 533 pack_order 1 534