1/* 2 * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> 3 * 4 * This file is part of Libav. 5 * 6 * Libav is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * Libav is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with Libav; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "asm.S" 22 23 preserve8 24 25.macro prerot dst, rt 26 lsr r3, r6, #2 @ n4 27 add \rt, r4, r6, lsr #1 @ revtab + n4 28 add r9, r3, r3, lsl #1 @ n3 29 add r8, r7, r6 @ tcos + n4 30 add r3, r2, r6, lsr #1 @ in + n4 31 add r9, r2, r9, lsl #1 @ in + n3 32 sub r8, r8, #16 33 sub r10, r3, #16 34 sub r11, r9, #16 35 mov r12, #-16 361: 37 vld2.16 {d0,d1}, [r9, :128]! 38 vld2.16 {d2,d3}, [r11,:128], r12 39 vld2.16 {d4,d5}, [r3, :128]! 40 vld2.16 {d6,d7}, [r10,:128], r12 41 vld2.16 {d16,d17},[r7, :128]! @ cos, sin 42 vld2.16 {d18,d19},[r8, :128], r12 43 vrev64.16 q1, q1 44 vrev64.16 q3, q3 45 vrev64.16 q9, q9 46 vneg.s16 d0, d0 47 vneg.s16 d2, d2 48 vneg.s16 d16, d16 49 vneg.s16 d18, d18 50 vhsub.s16 d0, d0, d3 @ re 51 vhsub.s16 d4, d7, d4 @ im 52 vhsub.s16 d6, d6, d5 53 vhsub.s16 d2, d2, d1 54 vmull.s16 q10, d0, d16 55 vmlsl.s16 q10, d4, d17 56 vmull.s16 q11, d0, d17 57 vmlal.s16 q11, d4, d16 58 vmull.s16 q12, d6, d18 59 vmlsl.s16 q12, d2, d19 60 vmull.s16 q13, d6, d19 61 vmlal.s16 q13, d2, d18 62 vshrn.s32 d0, q10, #15 63 vshrn.s32 d1, q11, #15 64 vshrn.s32 d2, q12, #15 65 vshrn.s32 d3, q13, #15 66 vzip.16 d0, d1 67 vzip.16 d2, d3 68 ldrh lr, [r4], #2 69 ldrh r2, [\rt, #-2]! 70 add lr, \dst, lr, lsl #2 71 add r2, \dst, r2, lsl #2 72 vst1.32 {d0[0]}, [lr,:32] 73 vst1.32 {d2[0]}, [r2,:32] 74 ldrh lr, [r4], #2 75 ldrh r2, [\rt, #-2]! 76 add lr, \dst, lr, lsl #2 77 add r2, \dst, r2, lsl #2 78 vst1.32 {d0[1]}, [lr,:32] 79 vst1.32 {d2[1]}, [r2,:32] 80 ldrh lr, [r4], #2 81 ldrh r2, [\rt, #-2]! 82 add lr, \dst, lr, lsl #2 83 add r2, \dst, r2, lsl #2 84 vst1.32 {d1[0]}, [lr,:32] 85 vst1.32 {d3[0]}, [r2,:32] 86 ldrh lr, [r4], #2 87 ldrh r2, [\rt, #-2]! 88 add lr, \dst, lr, lsl #2 89 add r2, \dst, r2, lsl #2 90 vst1.32 {d1[1]}, [lr,:32] 91 vst1.32 {d3[1]}, [r2,:32] 92 subs r6, r6, #32 93 bgt 1b 94.endm 95 96function ff_mdct_fixed_calc_neon, export=1 97 push {r1,r4-r11,lr} 98 99 ldr r4, [r0, #8] @ revtab 100 ldr r6, [r0, #16] @ mdct_size; n 101 ldr r7, [r0, #24] @ tcos 102 103 prerot r1, r5 104 105 mov r4, r0 106 bl X(ff_fft_fixed_calc_neon) 107 108 pop {r5} 109 mov r12, #-16 110 ldr r6, [r4, #16] @ mdct_size; n 111 ldr r7, [r4, #24] @ tcos 112 add r5, r5, r6, lsr #1 113 add r7, r7, r6, lsr #1 114 sub r1, r5, #16 115 sub r2, r7, #16 1161: 117 vld2.16 {d4,d5}, [r7,:128]! 118 vld2.16 {d6,d7}, [r2,:128], r12 119 vld2.16 {d0,d1}, [r5,:128] 120 vld2.16 {d2,d3}, [r1,:128] 121 vrev64.16 q3, q3 122 vrev64.16 q1, q1 123 vneg.s16 q3, q3 124 vneg.s16 q2, q2 125 vmull.s16 q11, d2, d6 126 vmlal.s16 q11, d3, d7 127 vmull.s16 q8, d0, d5 128 vmlsl.s16 q8, d1, d4 129 vmull.s16 q9, d0, d4 130 vmlal.s16 q9, d1, d5 131 vmull.s16 q10, d2, d7 132 vmlsl.s16 q10, d3, d6 133 vshrn.s32 d0, q11, #15 134 vshrn.s32 d1, q8, #15 135 vshrn.s32 d2, q9, #15 136 vshrn.s32 d3, q10, #15 137 vrev64.16 q0, q0 138 vst2.16 {d2,d3}, [r5,:128]! 139 vst2.16 {d0,d1}, [r1,:128], r12 140 subs r6, r6, #32 141 bgt 1b 142 143 pop {r4-r11,pc} 144endfunc 145 146function ff_mdct_fixed_calcw_neon, export=1 147 push {r1,r4-r11,lr} 148 149 ldrd r4, r5, [r0, #8] @ revtab, tmp_buf 150 ldr r6, [r0, #16] @ mdct_size; n 151 ldr r7, [r0, #24] @ tcos 152 153 prerot r5, r1 154 155 mov r4, r0 156 mov r1, r5 157 bl X(ff_fft_fixed_calc_neon) 158 159 pop {r7} 160 mov r12, #-16 161 ldr r6, [r4, #16] @ mdct_size; n 162 ldr r9, [r4, #24] @ tcos 163 add r5, r5, r6, lsr #1 164 add r7, r7, r6 165 add r9, r9, r6, lsr #1 166 sub r3, r5, #16 167 sub r1, r7, #16 168 sub r2, r9, #16 1691: 170 vld2.16 {d4,d5}, [r9,:128]! 171 vld2.16 {d6,d7}, [r2,:128], r12 172 vld2.16 {d0,d1}, [r5,:128]! 173 vld2.16 {d2,d3}, [r3,:128], r12 174 vrev64.16 q3, q3 175 vrev64.16 q1, q1 176 vneg.s16 q3, q3 177 vneg.s16 q2, q2 178 vmull.s16 q8, d2, d6 179 vmlal.s16 q8, d3, d7 180 vmull.s16 q9, d0, d5 181 vmlsl.s16 q9, d1, d4 182 vmull.s16 q10, d0, d4 183 vmlal.s16 q10, d1, d5 184 vmull.s16 q11, d2, d7 185 vmlsl.s16 q11, d3, d6 186 vrev64.32 q8, q8 187 vrev64.32 q9, q9 188 vst2.32 {q10,q11},[r7,:128]! 189 vst2.32 {d16,d18},[r1,:128], r12 190 vst2.32 {d17,d19},[r1,:128], r12 191 subs r6, r6, #32 192 bgt 1b 193 194 pop {r4-r11,pc} 195endfunc 196