1/* 2 * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavutil/arm/asm.S" 22 23.macro prerot dst, rt 24 lsr r3, r6, #2 @ n4 25 add \rt, r4, r6, lsr #1 @ revtab + n4 26 add r9, r3, r3, lsl #1 @ n3 27 add r8, r7, r6 @ tcos + n4 28 add r3, r2, r6, lsr #1 @ in + n4 29 add r9, r2, r9, lsl #1 @ in + n3 30 sub r8, r8, #16 31 sub r10, r3, #16 32 sub r11, r9, #16 33 mov r12, #-16 341: 35 vld2.16 {d0,d1}, [r9, :128]! 36 vld2.16 {d2,d3}, [r11,:128], r12 37 vld2.16 {d4,d5}, [r3, :128]! 38 vld2.16 {d6,d7}, [r10,:128], r12 39 vld2.16 {d16,d17},[r7, :128]! @ cos, sin 40 vld2.16 {d18,d19},[r8, :128], r12 41 vrev64.16 q1, q1 42 vrev64.16 q3, q3 43 vrev64.16 q9, q9 44 vneg.s16 d0, d0 45 vneg.s16 d2, d2 46 vneg.s16 d16, d16 47 vneg.s16 d18, d18 48 vhsub.s16 d0, d0, d3 @ re 49 vhsub.s16 d4, d7, d4 @ im 50 vhsub.s16 d6, d6, d5 51 vhsub.s16 d2, d2, d1 52 vmull.s16 q10, d0, d16 53 vmlsl.s16 q10, d4, d17 54 vmull.s16 q11, d0, d17 55 vmlal.s16 q11, d4, d16 56 vmull.s16 q12, d6, d18 57 vmlsl.s16 q12, d2, d19 58 vmull.s16 q13, d6, d19 59 vmlal.s16 q13, d2, d18 60 vshrn.s32 d0, q10, #15 61 vshrn.s32 d1, q11, #15 62 vshrn.s32 d2, q12, #15 63 vshrn.s32 d3, q13, #15 64 vzip.16 d0, d1 65 vzip.16 d2, d3 66 ldrh lr, [r4], #2 67 ldrh r2, [\rt, #-2]! 68 add lr, \dst, lr, lsl #2 69 add r2, \dst, r2, lsl #2 70 vst1.32 {d0[0]}, [lr,:32] 71 vst1.32 {d2[0]}, [r2,:32] 72 ldrh lr, [r4], #2 73 ldrh r2, [\rt, #-2]! 74 add lr, \dst, lr, lsl #2 75 add r2, \dst, r2, lsl #2 76 vst1.32 {d0[1]}, [lr,:32] 77 vst1.32 {d2[1]}, [r2,:32] 78 ldrh lr, [r4], #2 79 ldrh r2, [\rt, #-2]! 80 add lr, \dst, lr, lsl #2 81 add r2, \dst, r2, lsl #2 82 vst1.32 {d1[0]}, [lr,:32] 83 vst1.32 {d3[0]}, [r2,:32] 84 ldrh lr, [r4], #2 85 ldrh r2, [\rt, #-2]! 86 add lr, \dst, lr, lsl #2 87 add r2, \dst, r2, lsl #2 88 vst1.32 {d1[1]}, [lr,:32] 89 vst1.32 {d3[1]}, [r2,:32] 90 subs r6, r6, #32 91 bgt 1b 92.endm 93 94function ff_mdct_fixed_calc_neon, export=1 95 push {r1,r4-r11,lr} 96 97 ldr r4, [r0, #8] @ revtab 98 ldr r6, [r0, #16] @ mdct_size; n 99 ldr r7, [r0, #24] @ tcos 100 101 prerot r1, r5 102 103 mov r4, r0 104 bl X(ff_fft_fixed_calc_neon) 105 106 pop {r5} 107 mov r12, #-16 108 ldr r6, [r4, #16] @ mdct_size; n 109 ldr r7, [r4, #24] @ tcos 110 add r5, r5, r6, lsr #1 111 add r7, r7, r6, lsr #1 112 sub r1, r5, #16 113 sub r2, r7, #16 1141: 115 vld2.16 {d4,d5}, [r7,:128]! 116 vld2.16 {d6,d7}, [r2,:128], r12 117 vld2.16 {d0,d1}, [r5,:128] 118 vld2.16 {d2,d3}, [r1,:128] 119 vrev64.16 q3, q3 120 vrev64.16 q1, q1 121 vneg.s16 q3, q3 122 vneg.s16 q2, q2 123 vmull.s16 q11, d2, d6 124 vmlal.s16 q11, d3, d7 125 vmull.s16 q8, d0, d5 126 vmlsl.s16 q8, d1, d4 127 vmull.s16 q9, d0, d4 128 vmlal.s16 q9, d1, d5 129 vmull.s16 q10, d2, d7 130 vmlsl.s16 q10, d3, d6 131 vshrn.s32 d0, q11, #15 132 vshrn.s32 d1, q8, #15 133 vshrn.s32 d2, q9, #15 134 vshrn.s32 d3, q10, #15 135 vrev64.16 q0, q0 136 vst2.16 {d2,d3}, [r5,:128]! 137 vst2.16 {d0,d1}, [r1,:128], r12 138 subs r6, r6, #32 139 bgt 1b 140 141 pop {r4-r11,pc} 142endfunc 143 144function ff_mdct_fixed_calcw_neon, export=1 145 push {r1,r4-r11,lr} 146 147 ldrd r4, r5, [r0, #8] @ revtab, tmp_buf 148 ldr r6, [r0, #16] @ mdct_size; n 149 ldr r7, [r0, #24] @ tcos 150 151 prerot r5, r1 152 153 mov r4, r0 154 mov r1, r5 155 bl X(ff_fft_fixed_calc_neon) 156 157 pop {r7} 158 mov r12, #-16 159 ldr r6, [r4, #16] @ mdct_size; n 160 ldr r9, [r4, #24] @ tcos 161 add r5, r5, r6, lsr #1 162 add r7, r7, r6 163 add r9, r9, r6, lsr #1 164 sub r3, r5, #16 165 sub r1, r7, #16 166 sub r2, r9, #16 1671: 168 vld2.16 {d4,d5}, [r9,:128]! 169 vld2.16 {d6,d7}, [r2,:128], r12 170 vld2.16 {d0,d1}, [r5,:128]! 171 vld2.16 {d2,d3}, [r3,:128], r12 172 vrev64.16 q3, q3 173 vrev64.16 q1, q1 174 vneg.s16 q3, q3 175 vneg.s16 q2, q2 176 vmull.s16 q8, d2, d6 177 vmlal.s16 q8, d3, d7 178 vmull.s16 q9, d0, d5 179 vmlsl.s16 q9, d1, d4 180 vmull.s16 q10, d0, d4 181 vmlal.s16 q10, d1, d5 182 vmull.s16 q11, d2, d7 183 vmlsl.s16 q11, d3, d6 184 vrev64.32 q8, q8 185 vrev64.32 q9, q9 186 vst2.32 {q10,q11},[r7,:128]! 187 vst2.32 {d16,d18},[r1,:128], r12 188 vst2.32 {d17,d19},[r1,:128], r12 189 subs r6, r6, #32 190 bgt 1b 191 192 pop {r4-r11,pc} 193endfunc 194