1/* 2 * Copyright (c) 2012 3 * MIPS Technologies, Inc., California. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * Authors: Djordje Pesut (djordje@mips.com) 30 * Mirjana Vulin (mvulin@mips.com) 31 * 32 * This file is part of FFmpeg. 33 * 34 * FFmpeg is free software; you can redistribute it and/or 35 * modify it under the terms of the GNU Lesser General Public 36 * License as published by the Free Software Foundation; either 37 * version 2.1 of the License, or (at your option) any later version. 38 * 39 * FFmpeg is distributed in the hope that it will be useful, 40 * but WITHOUT ANY WARRANTY; without even the implied warranty of 41 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 42 * Lesser General Public License for more details. 43 * 44 * You should have received a copy of the GNU Lesser General Public 45 * License along with FFmpeg; if not, write to the Free Software 46 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 47 */ 48 49/** 50 * @file 51 * Reference: libavcodec/aacsbr.c 52 */ 53 54#include "libavcodec/aac.h" 55#include "libavcodec/aacsbr.h" 56 57#define ENVELOPE_ADJUSTMENT_OFFSET 2 58 59#if HAVE_INLINE_ASM 60static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr, 61 float X_low[32][40][2], const float W[2][32][32][2], 62 int buf_idx) 63{ 64 int i, k; 65 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 66 float *p_x_low = &X_low[0][8][0]; 67 float *p_w = (float*)&W[buf_idx][0][0][0]; 68 float *p_x1_low = &X_low[0][0][0]; 69 float *p_w1 = (float*)&W[1-buf_idx][24][0][0]; 70 71 float *loop_end=p_x1_low + 2560; 72 73 /* loop unrolled 8 times */ 74 __asm__ volatile ( 75 "1: \n\t" 76 "sw $0, 0(%[p_x1_low]) \n\t" 77 "sw $0, 4(%[p_x1_low]) \n\t" 78 "sw $0, 8(%[p_x1_low]) \n\t" 79 "sw $0, 12(%[p_x1_low]) \n\t" 80 "sw $0, 16(%[p_x1_low]) \n\t" 81 "sw $0, 20(%[p_x1_low]) \n\t" 82 "sw $0, 24(%[p_x1_low]) \n\t" 83 "sw $0, 28(%[p_x1_low]) \n\t" 84 "addiu %[p_x1_low], %[p_x1_low], 32 \n\t" 85 "bne %[p_x1_low], %[loop_end], 1b \n\t" 86 "addiu %[p_x1_low], %[p_x1_low], -10240 \n\t" 87 88 : [p_x1_low]"+r"(p_x1_low) 89 : [loop_end]"r"(loop_end) 90 : "memory" 91 ); 92 93 for (k = 0; k < sbr->kx[1]; k++) { 94 for (i = 0; i < 32; i+=4) { 95 /* loop unrolled 4 times */ 96 __asm__ volatile ( 97 "lw %[temp0], 0(%[p_w]) \n\t" 98 "lw %[temp1], 4(%[p_w]) \n\t" 99 "lw %[temp2], 256(%[p_w]) \n\t" 100 "lw %[temp3], 260(%[p_w]) \n\t" 101 "lw %[temp4], 512(%[p_w]) \n\t" 102 "lw %[temp5], 516(%[p_w]) \n\t" 103 "lw %[temp6], 768(%[p_w]) \n\t" 104 "lw %[temp7], 772(%[p_w]) \n\t" 105 "sw %[temp0], 0(%[p_x_low]) \n\t" 106 "sw %[temp1], 4(%[p_x_low]) \n\t" 107 "sw %[temp2], 8(%[p_x_low]) \n\t" 108 "sw %[temp3], 12(%[p_x_low]) \n\t" 109 "sw %[temp4], 16(%[p_x_low]) \n\t" 110 "sw %[temp5], 20(%[p_x_low]) \n\t" 111 "sw %[temp6], 24(%[p_x_low]) \n\t" 112 "sw %[temp7], 28(%[p_x_low]) \n\t" 113 "addiu %[p_x_low], %[p_x_low], 32 \n\t" 114 "addiu %[p_w], %[p_w], 1024 \n\t" 115 116 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 117 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 118 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 119 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 120 [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low) 121 : 122 : "memory" 123 ); 124 } 125 p_x_low += 16; 126 p_w -= 2046; 127 } 128 129 for (k = 0; k < sbr->kx[0]; k++) { 130 for (i = 0; i < 2; i++) { 131 132 /* loop unrolled 4 times */ 133 __asm__ volatile ( 134 "lw %[temp0], 0(%[p_w1]) \n\t" 135 "lw %[temp1], 4(%[p_w1]) \n\t" 136 "lw %[temp2], 256(%[p_w1]) \n\t" 137 "lw %[temp3], 260(%[p_w1]) \n\t" 138 "lw %[temp4], 512(%[p_w1]) \n\t" 139 "lw %[temp5], 516(%[p_w1]) \n\t" 140 "lw %[temp6], 768(%[p_w1]) \n\t" 141 "lw %[temp7], 772(%[p_w1]) \n\t" 142 "sw %[temp0], 0(%[p_x1_low]) \n\t" 143 "sw %[temp1], 4(%[p_x1_low]) \n\t" 144 "sw %[temp2], 8(%[p_x1_low]) \n\t" 145 "sw %[temp3], 12(%[p_x1_low]) \n\t" 146 "sw %[temp4], 16(%[p_x1_low]) \n\t" 147 "sw %[temp5], 20(%[p_x1_low]) \n\t" 148 "sw %[temp6], 24(%[p_x1_low]) \n\t" 149 "sw %[temp7], 28(%[p_x1_low]) \n\t" 150 "addiu %[p_x1_low], %[p_x1_low], 32 \n\t" 151 "addiu %[p_w1], %[p_w1], 1024 \n\t" 152 153 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 154 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 155 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 156 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 157 [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low) 158 : 159 : "memory" 160 ); 161 } 162 p_x1_low += 64; 163 p_w1 -= 510; 164 } 165 return 0; 166} 167 168static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64], 169 const float Y0[38][64][2], const float Y1[38][64][2], 170 const float X_low[32][40][2], int ch) 171{ 172 int k, i; 173 const int i_f = 32; 174 int temp0, temp1, temp2, temp3; 175 const float *X_low1, *Y01, *Y11; 176 float *x1=&X[0][0][0]; 177 float *j=x1+4864; 178 const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0); 179 180 /* loop unrolled 8 times */ 181 __asm__ volatile ( 182 "1: \n\t" 183 "sw $0, 0(%[x1]) \n\t" 184 "sw $0, 4(%[x1]) \n\t" 185 "sw $0, 8(%[x1]) \n\t" 186 "sw $0, 12(%[x1]) \n\t" 187 "sw $0, 16(%[x1]) \n\t" 188 "sw $0, 20(%[x1]) \n\t" 189 "sw $0, 24(%[x1]) \n\t" 190 "sw $0, 28(%[x1]) \n\t" 191 "addiu %[x1], %[x1], 32 \n\t" 192 "bne %[x1], %[j], 1b \n\t" 193 "addiu %[x1], %[x1], -19456 \n\t" 194 195 : [x1]"+r"(x1) 196 : [j]"r"(j) 197 : "memory" 198 ); 199 200 if (i_Temp != 0) { 201 202 X_low1=&X_low[0][2][0]; 203 204 for (k = 0; k < sbr->kx[0]; k++) { 205 206 __asm__ volatile ( 207 "move %[i], $zero \n\t" 208 "2: \n\t" 209 "lw %[temp0], 0(%[X_low1]) \n\t" 210 "lw %[temp1], 4(%[X_low1]) \n\t" 211 "sw %[temp0], 0(%[x1]) \n\t" 212 "sw %[temp1], 9728(%[x1]) \n\t" 213 "addiu %[x1], %[x1], 256 \n\t" 214 "addiu %[X_low1], %[X_low1], 8 \n\t" 215 "addiu %[i], %[i], 1 \n\t" 216 "bne %[i], %[i_Temp], 2b \n\t" 217 218 : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i), 219 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1) 220 : [i_Temp]"r"(i_Temp) 221 : "memory" 222 ); 223 x1-=(i_Temp<<6)-1; 224 X_low1-=(i_Temp<<1)-80; 225 } 226 227 x1=&X[0][0][k]; 228 Y01=(float*)&Y0[32][k][0]; 229 230 for (; k < sbr->kx[0] + sbr->m[0]; k++) { 231 __asm__ volatile ( 232 "move %[i], $zero \n\t" 233 "3: \n\t" 234 "lw %[temp0], 0(%[Y01]) \n\t" 235 "lw %[temp1], 4(%[Y01]) \n\t" 236 "sw %[temp0], 0(%[x1]) \n\t" 237 "sw %[temp1], 9728(%[x1]) \n\t" 238 "addiu %[x1], %[x1], 256 \n\t" 239 "addiu %[Y01], %[Y01], 512 \n\t" 240 "addiu %[i], %[i], 1 \n\t" 241 "bne %[i], %[i_Temp], 3b \n\t" 242 243 : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i), 244 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1) 245 : [i_Temp]"r"(i_Temp) 246 : "memory" 247 ); 248 x1 -=(i_Temp<<6)-1; 249 Y01 -=(i_Temp<<7)-2; 250 } 251 } 252 253 x1=&X[0][i_Temp][0]; 254 X_low1=&X_low[0][i_Temp+2][0]; 255 temp3=38; 256 257 for (k = 0; k < sbr->kx[1]; k++) { 258 259 __asm__ volatile ( 260 "move %[i], %[i_Temp] \n\t" 261 "4: \n\t" 262 "lw %[temp0], 0(%[X_low1]) \n\t" 263 "lw %[temp1], 4(%[X_low1]) \n\t" 264 "sw %[temp0], 0(%[x1]) \n\t" 265 "sw %[temp1], 9728(%[x1]) \n\t" 266 "addiu %[x1], %[x1], 256 \n\t" 267 "addiu %[X_low1], %[X_low1], 8 \n\t" 268 "addiu %[i], %[i], 1 \n\t" 269 "bne %[i], %[temp3], 4b \n\t" 270 271 : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i), 272 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 273 [temp2]"=&r"(temp2) 274 : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3) 275 : "memory" 276 ); 277 x1 -= ((38-i_Temp)<<6)-1; 278 X_low1 -= ((38-i_Temp)<<1)- 80; 279 } 280 281 x1=&X[0][i_Temp][k]; 282 Y11=&Y1[i_Temp][k][0]; 283 temp2=32; 284 285 for (; k < sbr->kx[1] + sbr->m[1]; k++) { 286 287 __asm__ volatile ( 288 "move %[i], %[i_Temp] \n\t" 289 "5: \n\t" 290 "lw %[temp0], 0(%[Y11]) \n\t" 291 "lw %[temp1], 4(%[Y11]) \n\t" 292 "sw %[temp0], 0(%[x1]) \n\t" 293 "sw %[temp1], 9728(%[x1]) \n\t" 294 "addiu %[x1], %[x1], 256 \n\t" 295 "addiu %[Y11], %[Y11], 512 \n\t" 296 "addiu %[i], %[i], 1 \n\t" 297 "bne %[i], %[temp2], 5b \n\t" 298 299 : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i), 300 [temp0]"=&r"(temp0), [temp1]"=&r"(temp1) 301 : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3), 302 [temp2]"r"(temp2) 303 : "memory" 304 ); 305 306 x1 -= ((32-i_Temp)<<6)-1; 307 Y11 -= ((32-i_Temp)<<7)-2; 308 } 309 return 0; 310} 311 312#if HAVE_MIPSFPU 313static void sbr_hf_assemble_mips(float Y1[38][64][2], 314 const float X_high[64][40][2], 315 SpectralBandReplication *sbr, SBRData *ch_data, 316 const int e_a[2]) 317{ 318 int e, i, j, m; 319 const int h_SL = 4 * !sbr->bs_smoothing_mode; 320 const int kx = sbr->kx[1]; 321 const int m_max = sbr->m[1]; 322 static const float h_smooth[5] = { 323 0.33333333333333, 324 0.30150283239582, 325 0.21816949906249, 326 0.11516383427084, 327 0.03183050093751, 328 }; 329 330 float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp; 331 int indexnoise = ch_data->f_indexnoise; 332 int indexsine = ch_data->f_indexsine; 333 float *g_temp1, *q_temp1, *pok, *pok1; 334 float temp1, temp2, temp3, temp4; 335 int size = m_max; 336 337 if (sbr->reset) { 338 for (i = 0; i < h_SL; i++) { 339 memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0])); 340 memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0], m_max * sizeof(sbr->q_m[0][0])); 341 } 342 } else if (h_SL) { 343 memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0])); 344 memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0])); 345 } 346 347 for (e = 0; e < ch_data->bs_num_env; e++) { 348 for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) { 349 g_temp1 = g_temp[h_SL + i]; 350 pok = sbr->gain[e]; 351 q_temp1 = q_temp[h_SL + i]; 352 pok1 = sbr->q_m[e]; 353 354 /* loop unrolled 4 times */ 355 for (j=0; j<(size>>2); j++) { 356 __asm__ volatile ( 357 "lw %[temp1], 0(%[pok]) \n\t" 358 "lw %[temp2], 4(%[pok]) \n\t" 359 "lw %[temp3], 8(%[pok]) \n\t" 360 "lw %[temp4], 12(%[pok]) \n\t" 361 "sw %[temp1], 0(%[g_temp1]) \n\t" 362 "sw %[temp2], 4(%[g_temp1]) \n\t" 363 "sw %[temp3], 8(%[g_temp1]) \n\t" 364 "sw %[temp4], 12(%[g_temp1]) \n\t" 365 "lw %[temp1], 0(%[pok1]) \n\t" 366 "lw %[temp2], 4(%[pok1]) \n\t" 367 "lw %[temp3], 8(%[pok1]) \n\t" 368 "lw %[temp4], 12(%[pok1]) \n\t" 369 "sw %[temp1], 0(%[q_temp1]) \n\t" 370 "sw %[temp2], 4(%[q_temp1]) \n\t" 371 "sw %[temp3], 8(%[q_temp1]) \n\t" 372 "sw %[temp4], 12(%[q_temp1]) \n\t" 373 "addiu %[pok], %[pok], 16 \n\t" 374 "addiu %[g_temp1], %[g_temp1], 16 \n\t" 375 "addiu %[pok1], %[pok1], 16 \n\t" 376 "addiu %[q_temp1], %[q_temp1], 16 \n\t" 377 378 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 379 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), 380 [pok]"+r"(pok), [g_temp1]"+r"(g_temp1), 381 [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1) 382 : 383 : "memory" 384 ); 385 } 386 387 for (j=0; j<(size&3); j++) { 388 __asm__ volatile ( 389 "lw %[temp1], 0(%[pok]) \n\t" 390 "lw %[temp2], 0(%[pok1]) \n\t" 391 "sw %[temp1], 0(%[g_temp1]) \n\t" 392 "sw %[temp2], 0(%[q_temp1]) \n\t" 393 "addiu %[pok], %[pok], 4 \n\t" 394 "addiu %[g_temp1], %[g_temp1], 4 \n\t" 395 "addiu %[pok1], %[pok1], 4 \n\t" 396 "addiu %[q_temp1], %[q_temp1], 4 \n\t" 397 398 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 399 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), 400 [pok]"+r"(pok), [g_temp1]"+r"(g_temp1), 401 [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1) 402 : 403 : "memory" 404 ); 405 } 406 } 407 } 408 409 for (e = 0; e < ch_data->bs_num_env; e++) { 410 for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) { 411 LOCAL_ALIGNED_16(float, g_filt_tab, [48]); 412 LOCAL_ALIGNED_16(float, q_filt_tab, [48]); 413 float *g_filt, *q_filt; 414 415 if (h_SL && e != e_a[0] && e != e_a[1]) { 416 g_filt = g_filt_tab; 417 q_filt = q_filt_tab; 418 419 for (m = 0; m < m_max; m++) { 420 const int idx1 = i + h_SL; 421 g_filt[m] = 0.0f; 422 q_filt[m] = 0.0f; 423 424 for (j = 0; j <= h_SL; j++) { 425 g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j]; 426 q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j]; 427 } 428 } 429 } else { 430 g_filt = g_temp[i + h_SL]; 431 q_filt = q_temp[i]; 432 } 433 434 sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max, 435 i + ENVELOPE_ADJUSTMENT_OFFSET); 436 437 if (e != e_a[0] && e != e_a[1]) { 438 sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e], 439 q_filt, indexnoise, 440 kx, m_max); 441 } else { 442 int idx = indexsine&1; 443 int A = (1-((indexsine+(kx & 1))&2)); 444 int B = (A^(-idx)) + idx; 445 float *out = &Y1[i][kx][idx]; 446 float *in = sbr->s_m[e]; 447 float temp0, temp1, temp2, temp3, temp4, temp5; 448 float A_f = (float)A; 449 float B_f = (float)B; 450 451 for (m = 0; m+1 < m_max; m+=2) { 452 453 temp2 = out[0]; 454 temp3 = out[2]; 455 456 __asm__ volatile( 457 "lwc1 %[temp0], 0(%[in]) \n\t" 458 "lwc1 %[temp1], 4(%[in]) \n\t" 459 "madd.s %[temp4], %[temp2], %[temp0], %[A_f] \n\t" 460 "madd.s %[temp5], %[temp3], %[temp1], %[B_f] \n\t" 461 "swc1 %[temp4], 0(%[out]) \n\t" 462 "swc1 %[temp5], 8(%[out]) \n\t" 463 "addiu %[in], %[in], 8 \n\t" 464 "addiu %[out], %[out], 16 \n\t" 465 466 : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1), 467 [temp4]"=&f" (temp4), [temp5]"=&f"(temp5), 468 [in]"+r"(in), [out]"+r"(out) 469 : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2), 470 [temp3]"f"(temp3) 471 : "memory" 472 ); 473 } 474 if(m_max&1) 475 out[2*m ] += in[m ] * A; 476 } 477 indexnoise = (indexnoise + m_max) & 0x1ff; 478 indexsine = (indexsine + 1) & 3; 479 } 480 } 481 ch_data->f_indexnoise = indexnoise; 482 ch_data->f_indexsine = indexsine; 483} 484 485static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp, 486 float (*alpha0)[2], float (*alpha1)[2], 487 const float X_low[32][40][2], int k0) 488{ 489 int k; 490 float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c; 491 float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im; 492 493 c = 1.000001f; 494 495 for (k = 0; k < k0; k++) { 496 LOCAL_ALIGNED_16(float, phi, [3], [2][2]); 497 float dk; 498 phi1 = &phi[0][0][0]; 499 alpha_1 = &alpha1[k][0]; 500 alpha_0 = &alpha0[k][0]; 501 dsp->autocorrelate(X_low[k], phi); 502 503 __asm__ volatile ( 504 "lwc1 %[temp0], 40(%[phi1]) \n\t" 505 "lwc1 %[temp1], 16(%[phi1]) \n\t" 506 "lwc1 %[temp2], 24(%[phi1]) \n\t" 507 "lwc1 %[temp3], 28(%[phi1]) \n\t" 508 "mul.s %[dk], %[temp0], %[temp1] \n\t" 509 "lwc1 %[temp4], 0(%[phi1]) \n\t" 510 "mul.s %[res2], %[temp2], %[temp2] \n\t" 511 "lwc1 %[temp5], 4(%[phi1]) \n\t" 512 "madd.s %[res2], %[res2], %[temp3], %[temp3] \n\t" 513 "lwc1 %[temp6], 8(%[phi1]) \n\t" 514 "div.s %[res2], %[res2], %[c] \n\t" 515 "lwc1 %[temp0], 12(%[phi1]) \n\t" 516 "sub.s %[dk], %[dk], %[res2] \n\t" 517 518 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), 519 [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), 520 [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk) 521 : [phi1]"r"(phi1), [c]"f"(c) 522 : "memory" 523 ); 524 525 if (!dk) { 526 alpha_1[0] = 0; 527 alpha_1[1] = 0; 528 } else { 529 __asm__ volatile ( 530 "mul.s %[temp_real], %[temp4], %[temp2] \n\t" 531 "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3] \n\t" 532 "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1] \n\t" 533 "mul.s %[temp_im], %[temp4], %[temp3] \n\t" 534 "madd.s %[temp_im], %[temp_im], %[temp5], %[temp2] \n\t" 535 "nmsub.s %[temp_im], %[temp_im], %[temp0], %[temp1] \n\t" 536 "div.s %[temp_real], %[temp_real], %[dk] \n\t" 537 "div.s %[temp_im], %[temp_im], %[dk] \n\t" 538 "swc1 %[temp_real], 0(%[alpha_1]) \n\t" 539 "swc1 %[temp_im], 4(%[alpha_1]) \n\t" 540 541 : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im) 542 : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1), 543 [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4), 544 [temp5]"f"(temp5), [temp6]"f"(temp6), 545 [alpha_1]"r"(alpha_1), [dk]"f"(dk) 546 : "memory" 547 ); 548 } 549 550 if (!phi1[4]) { 551 alpha_0[0] = 0; 552 alpha_0[1] = 0; 553 } else { 554 __asm__ volatile ( 555 "lwc1 %[temp6], 0(%[alpha_1]) \n\t" 556 "lwc1 %[temp7], 4(%[alpha_1]) \n\t" 557 "mul.s %[temp_real], %[temp6], %[temp2] \n\t" 558 "add.s %[temp_real], %[temp_real], %[temp4] \n\t" 559 "madd.s %[temp_real], %[temp_real], %[temp7], %[temp3] \n\t" 560 "mul.s %[temp_im], %[temp7], %[temp2] \n\t" 561 "add.s %[temp_im], %[temp_im], %[temp5] \n\t" 562 "nmsub.s %[temp_im], %[temp_im], %[temp6], %[temp3] \n\t" 563 "div.s %[temp_real], %[temp_real], %[temp1] \n\t" 564 "div.s %[temp_im], %[temp_im], %[temp1] \n\t" 565 "neg.s %[temp_real], %[temp_real] \n\t" 566 "neg.s %[temp_im], %[temp_im] \n\t" 567 "swc1 %[temp_real], 0(%[alpha_0]) \n\t" 568 "swc1 %[temp_im], 4(%[alpha_0]) \n\t" 569 570 : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im), 571 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), 572 [res1]"=&f"(res1), [res2]"=&f"(res2) 573 : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0), 574 [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2), 575 [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5) 576 : "memory" 577 ); 578 } 579 580 __asm__ volatile ( 581 "lwc1 %[temp1], 0(%[alpha_1]) \n\t" 582 "lwc1 %[temp2], 4(%[alpha_1]) \n\t" 583 "lwc1 %[temp_real], 0(%[alpha_0]) \n\t" 584 "lwc1 %[temp_im], 4(%[alpha_0]) \n\t" 585 "mul.s %[res1], %[temp1], %[temp1] \n\t" 586 "madd.s %[res1], %[res1], %[temp2], %[temp2] \n\t" 587 "mul.s %[res2], %[temp_real], %[temp_real] \n\t" 588 "madd.s %[res2], %[res2], %[temp_im], %[temp_im] \n\t" 589 590 : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im), 591 [temp1]"=&f"(temp1), [temp2]"=&f"(temp2), 592 [res1]"=&f"(res1), [res2]"=&f"(res2) 593 : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0) 594 : "memory" 595 ); 596 597 if (res1 >= 16.0f || res2 >= 16.0f) { 598 alpha_1[0] = 0; 599 alpha_1[1] = 0; 600 alpha_0[0] = 0; 601 alpha_0[1] = 0; 602 } 603 } 604} 605#endif /* HAVE_MIPSFPU */ 606#endif /* HAVE_INLINE_ASM */ 607 608void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c) 609{ 610#if HAVE_INLINE_ASM 611 c->sbr_lf_gen = sbr_lf_gen_mips; 612 c->sbr_x_gen = sbr_x_gen_mips; 613#if HAVE_MIPSFPU 614 c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips; 615 c->sbr_hf_assemble = sbr_hf_assemble_mips; 616#endif /* HAVE_MIPSFPU */ 617#endif /* HAVE_INLINE_ASM */ 618} 619