1/* 2 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> 3 * 4 * This file is part of Libav. 5 * 6 * Libav is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * Libav is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with Libav; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "asm.S" 22 23 preserve8 24 25function ff_synth_filter_float_neon, export=1 26 push {r3-r11,lr} 27 28 ldr r4, [r2] @ synth_buf_offset 29 add r1, r1, r4, lsl #2 @ synth_buf 30 sub r12, r4, #32 31 bfc r12, #9, #23 32 bic r4, r4, #63 33 str r12, [r2] 34 35 ldr r2, [sp, #12*4] @ in 36 mov r9, r1 @ synth_buf 37 38VFP vpush {d0} 39 bl X(ff_imdct_half_neon) 40VFP vpop {d0} 41 pop {r3} 42 43 ldr r5, [sp, #9*4] @ window 44 ldr r2, [sp, #10*4] @ out 45NOVFP vldr s0, [sp, #12*4] @ scale 46 add r8, r9, #12*4 47 48 mov lr, #64*4 49 mov r1, #4 501: 51 add r10, r9, #16*4 @ synth_buf 52 add r11, r8, #16*4 53 add r0, r5, #16*4 @ window 54 add r6, r5, #32*4 55 add r7, r5, #48*4 56 57 vld1.32 {q10}, [r3,:128] @ a 58 add r3, r3, #16*4 59 vld1.32 {q1}, [r3,:128] @ b 60 vmov.f32 q2, #0.0 @ c 61 vmov.f32 q3, #0.0 @ d 62 63 mov r12, #512 642: 65 vld1.32 {q9}, [r8, :128], lr 66 vrev64.32 q9, q9 67 vld1.32 {q8}, [r5, :128], lr 68 vmls.f32 d20, d16, d19 69 vld1.32 {q11}, [r0, :128], lr 70 vmls.f32 d21, d17, d18 71 vld1.32 {q12}, [r9, :128], lr 72 vmla.f32 d2, d22, d24 73 vld1.32 {q8}, [r6, :128], lr 74 vmla.f32 d3, d23, d25 75 vld1.32 {q9}, [r10,:128], lr 76 vmla.f32 d4, d16, d18 77 vld1.32 {q12}, [r11,:128], lr 78 vmla.f32 d5, d17, d19 79 vrev64.32 q12, q12 80 vld1.32 {q11}, [r7, :128], lr 81 vmla.f32 d6, d22, d25 82 vmla.f32 d7, d23, d24 83 subs r12, r12, #64 84 beq 3f 85 cmp r12, r4 86 bne 2b 87 sub r8, r8, #512*4 88 sub r9, r9, #512*4 89 sub r10, r10, #512*4 90 sub r11, r11, #512*4 91 b 2b 923: 93 vmul.f32 q8, q10, d0[0] 94 vmul.f32 q9, q1, d0[0] 95 vst1.32 {q3}, [r3,:128] 96 sub r3, r3, #16*4 97 vst1.32 {q2}, [r3,:128] 98 vst1.32 {q8}, [r2,:128] 99 add r2, r2, #16*4 100 vst1.32 {q9}, [r2,:128] 101 102 subs r1, r1, #1 103 it eq 104 popeq {r4-r11,pc} 105 106 cmp r4, #0 107 itt eq 108 subeq r8, r8, #512*4 109 subeq r9, r9, #512*4 110 sub r5, r5, #512*4 111 sub r2, r2, #12*4 @ out 112 add r3, r3, #4*4 @ synth_buf2 113 add r5, r5, #4*4 @ window 114 add r9, r9, #4*4 @ synth_buf 115 sub r8, r8, #4*4 @ synth_buf 116 b 1b 117endfunc 118