1/* 2 * Copyright (c) 2012 3 * MIPS Technologies, Inc., California. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * Authors: Darko Laus (darko@mips.com) 30 * Djordje Pesut (djordje@mips.com) 31 * Mirjana Vulin (mvulin@mips.com) 32 * 33 * This file is part of FFmpeg. 34 * 35 * FFmpeg is free software; you can redistribute it and/or 36 * modify it under the terms of the GNU Lesser General Public 37 * License as published by the Free Software Foundation; either 38 * version 2.1 of the License, or (at your option) any later version. 39 * 40 * FFmpeg is distributed in the hope that it will be useful, 41 * but WITHOUT ANY WARRANTY; without even the implied warranty of 42 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 43 * Lesser General Public License for more details. 44 * 45 * You should have received a copy of the GNU Lesser General Public 46 * License along with FFmpeg; if not, write to the Free Software 47 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 48 */ 49 50/** 51 * @file 52 * Reference: libavcodec/aacdec.c 53 */ 54 55#include "libavcodec/aac.h" 56#include "aacdec_mips.h" 57#include "libavcodec/aactab.h" 58#include "libavcodec/sinewin.h" 59 60#if HAVE_INLINE_ASM 61static av_always_inline int lcg_random(unsigned previous_val) 62{ 63 union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 }; 64 return v.s; 65} 66 67static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce) 68{ 69 IndividualChannelStream *ics = &sce->ics; 70 float *in = sce->coeffs; 71 float *out = sce->ret; 72 float *saved = sce->saved; 73 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 74 const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; 75 const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; 76 float *buf = ac->buf_mdct; 77 int i; 78 79 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 80 for (i = 0; i < 1024; i += 128) 81 ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i); 82 } else 83 ac->mdct.imdct_half(&ac->mdct, buf, in); 84 85 /* window overlapping 86 * NOTE: To simplify the overlapping code, all 'meaningless' short to long 87 * and long to short transitions are considered to be short to short 88 * transitions. This leaves just two cases (long to long and short to short) 89 * with a little special sauce for EIGHT_SHORT_SEQUENCE. 90 */ 91 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && 92 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { 93 ac->fdsp.vector_fmul_window( out, saved, buf, lwindow_prev, 512); 94 } else { 95 { 96 float *buf1 = saved; 97 float *buf2 = out; 98 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 99 int loop_end; 100 101 /* loop unrolled 8 times */ 102 __asm__ volatile ( 103 ".set push \n\t" 104 ".set noreorder \n\t" 105 "addiu %[loop_end], %[src], 1792 \n\t" 106 "1: \n\t" 107 "lw %[temp0], 0(%[src]) \n\t" 108 "lw %[temp1], 4(%[src]) \n\t" 109 "lw %[temp2], 8(%[src]) \n\t" 110 "lw %[temp3], 12(%[src]) \n\t" 111 "lw %[temp4], 16(%[src]) \n\t" 112 "lw %[temp5], 20(%[src]) \n\t" 113 "lw %[temp6], 24(%[src]) \n\t" 114 "lw %[temp7], 28(%[src]) \n\t" 115 "addiu %[src], %[src], 32 \n\t" 116 "sw %[temp0], 0(%[dst]) \n\t" 117 "sw %[temp1], 4(%[dst]) \n\t" 118 "sw %[temp2], 8(%[dst]) \n\t" 119 "sw %[temp3], 12(%[dst]) \n\t" 120 "sw %[temp4], 16(%[dst]) \n\t" 121 "sw %[temp5], 20(%[dst]) \n\t" 122 "sw %[temp6], 24(%[dst]) \n\t" 123 "sw %[temp7], 28(%[dst]) \n\t" 124 "bne %[src], %[loop_end], 1b \n\t" 125 " addiu %[dst], %[dst], 32 \n\t" 126 ".set pop \n\t" 127 128 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 129 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 130 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 131 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 132 [loop_end]"=&r"(loop_end), [src]"+r"(buf1), 133 [dst]"+r"(buf2) 134 : 135 : "memory" 136 ); 137 } 138 139 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 140 { 141 float wi; 142 float wj; 143 int i; 144 float temp0, temp1, temp2, temp3; 145 float *dst0 = out + 448 + 0*128; 146 float *dst1 = dst0 + 64 + 63; 147 float *dst2 = saved + 63; 148 float *win0 = (float*)swindow; 149 float *win1 = win0 + 64 + 63; 150 float *win0_prev = (float*)swindow_prev; 151 float *win1_prev = win0_prev + 64 + 63; 152 float *src0_prev = saved + 448; 153 float *src1_prev = buf + 0*128 + 63; 154 float *src0 = buf + 0*128 + 64; 155 float *src1 = buf + 1*128 + 63; 156 157 for(i = 0; i < 64; i++) 158 { 159 temp0 = src0_prev[0]; 160 temp1 = src1_prev[0]; 161 wi = *win0_prev; 162 wj = *win1_prev; 163 temp2 = src0[0]; 164 temp3 = src1[0]; 165 dst0[0] = temp0 * wj - temp1 * wi; 166 dst1[0] = temp0 * wi + temp1 * wj; 167 168 wi = *win0; 169 wj = *win1; 170 171 temp0 = src0[128]; 172 temp1 = src1[128]; 173 dst0[128] = temp2 * wj - temp3 * wi; 174 dst1[128] = temp2 * wi + temp3 * wj; 175 176 temp2 = src0[256]; 177 temp3 = src1[256]; 178 dst0[256] = temp0 * wj - temp1 * wi; 179 dst1[256] = temp0 * wi + temp1 * wj; 180 dst0[384] = temp2 * wj - temp3 * wi; 181 dst1[384] = temp2 * wi + temp3 * wj; 182 183 temp0 = src0[384]; 184 temp1 = src1[384]; 185 dst0[512] = temp0 * wj - temp1 * wi; 186 dst2[0] = temp0 * wi + temp1 * wj; 187 188 src0++; 189 src1--; 190 src0_prev++; 191 src1_prev--; 192 win0++; 193 win1--; 194 win0_prev++; 195 win1_prev--; 196 dst0++; 197 dst1--; 198 dst2--; 199 } 200 } 201 } else { 202 ac->fdsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64); 203 { 204 float *buf1 = buf + 64; 205 float *buf2 = out + 576; 206 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 207 int loop_end; 208 209 /* loop unrolled 8 times */ 210 __asm__ volatile ( 211 ".set push \n\t" 212 ".set noreorder \n\t" 213 "addiu %[loop_end], %[src], 1792 \n\t" 214 "1: \n\t" 215 "lw %[temp0], 0(%[src]) \n\t" 216 "lw %[temp1], 4(%[src]) \n\t" 217 "lw %[temp2], 8(%[src]) \n\t" 218 "lw %[temp3], 12(%[src]) \n\t" 219 "lw %[temp4], 16(%[src]) \n\t" 220 "lw %[temp5], 20(%[src]) \n\t" 221 "lw %[temp6], 24(%[src]) \n\t" 222 "lw %[temp7], 28(%[src]) \n\t" 223 "addiu %[src], %[src], 32 \n\t" 224 "sw %[temp0], 0(%[dst]) \n\t" 225 "sw %[temp1], 4(%[dst]) \n\t" 226 "sw %[temp2], 8(%[dst]) \n\t" 227 "sw %[temp3], 12(%[dst]) \n\t" 228 "sw %[temp4], 16(%[dst]) \n\t" 229 "sw %[temp5], 20(%[dst]) \n\t" 230 "sw %[temp6], 24(%[dst]) \n\t" 231 "sw %[temp7], 28(%[dst]) \n\t" 232 "bne %[src], %[loop_end], 1b \n\t" 233 " addiu %[dst], %[dst], 32 \n\t" 234 ".set pop \n\t" 235 236 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 237 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 238 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 239 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 240 [loop_end]"=&r"(loop_end), [src]"+r"(buf1), 241 [dst]"+r"(buf2) 242 : 243 : "memory" 244 ); 245 } 246 } 247 } 248 249 // buffer update 250 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 251 ac->fdsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64); 252 ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64); 253 ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64); 254 { 255 float *buf1 = buf + 7*128 + 64; 256 float *buf2 = saved + 448; 257 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 258 int loop_end; 259 260 /* loop unrolled 8 times */ 261 __asm__ volatile ( 262 ".set push \n\t" 263 ".set noreorder \n\t" 264 "addiu %[loop_end], %[src], 256 \n\t" 265 "1: \n\t" 266 "lw %[temp0], 0(%[src]) \n\t" 267 "lw %[temp1], 4(%[src]) \n\t" 268 "lw %[temp2], 8(%[src]) \n\t" 269 "lw %[temp3], 12(%[src]) \n\t" 270 "lw %[temp4], 16(%[src]) \n\t" 271 "lw %[temp5], 20(%[src]) \n\t" 272 "lw %[temp6], 24(%[src]) \n\t" 273 "lw %[temp7], 28(%[src]) \n\t" 274 "addiu %[src], %[src], 32 \n\t" 275 "sw %[temp0], 0(%[dst]) \n\t" 276 "sw %[temp1], 4(%[dst]) \n\t" 277 "sw %[temp2], 8(%[dst]) \n\t" 278 "sw %[temp3], 12(%[dst]) \n\t" 279 "sw %[temp4], 16(%[dst]) \n\t" 280 "sw %[temp5], 20(%[dst]) \n\t" 281 "sw %[temp6], 24(%[dst]) \n\t" 282 "sw %[temp7], 28(%[dst]) \n\t" 283 "bne %[src], %[loop_end], 1b \n\t" 284 " addiu %[dst], %[dst], 32 \n\t" 285 ".set pop \n\t" 286 287 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 288 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 289 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 290 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 291 [loop_end]"=&r"(loop_end), [src]"+r"(buf1), 292 [dst]"+r"(buf2) 293 : 294 : "memory" 295 ); 296 } 297 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 298 float *buf1 = buf + 512; 299 float *buf2 = saved; 300 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 301 int loop_end; 302 303 /* loop unrolled 8 times */ 304 __asm__ volatile ( 305 ".set push \n\t" 306 ".set noreorder \n\t" 307 "addiu %[loop_end], %[src], 1792 \n\t" 308 "1: \n\t" 309 "lw %[temp0], 0(%[src]) \n\t" 310 "lw %[temp1], 4(%[src]) \n\t" 311 "lw %[temp2], 8(%[src]) \n\t" 312 "lw %[temp3], 12(%[src]) \n\t" 313 "lw %[temp4], 16(%[src]) \n\t" 314 "lw %[temp5], 20(%[src]) \n\t" 315 "lw %[temp6], 24(%[src]) \n\t" 316 "lw %[temp7], 28(%[src]) \n\t" 317 "addiu %[src], %[src], 32 \n\t" 318 "sw %[temp0], 0(%[dst]) \n\t" 319 "sw %[temp1], 4(%[dst]) \n\t" 320 "sw %[temp2], 8(%[dst]) \n\t" 321 "sw %[temp3], 12(%[dst]) \n\t" 322 "sw %[temp4], 16(%[dst]) \n\t" 323 "sw %[temp5], 20(%[dst]) \n\t" 324 "sw %[temp6], 24(%[dst]) \n\t" 325 "sw %[temp7], 28(%[dst]) \n\t" 326 "bne %[src], %[loop_end], 1b \n\t" 327 " addiu %[dst], %[dst], 32 \n\t" 328 ".set pop \n\t" 329 330 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 331 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 332 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 333 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 334 [loop_end]"=&r"(loop_end), [src]"+r"(buf1), 335 [dst]"+r"(buf2) 336 : 337 : "memory" 338 ); 339 { 340 float *buf1 = buf + 7*128 + 64; 341 float *buf2 = saved + 448; 342 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 343 int loop_end; 344 345 /* loop unrolled 8 times */ 346 __asm__ volatile ( 347 ".set push \n\t" 348 ".set noreorder \n\t" 349 "addiu %[loop_end], %[src], 256 \n\t" 350 "1: \n\t" 351 "lw %[temp0], 0(%[src]) \n\t" 352 "lw %[temp1], 4(%[src]) \n\t" 353 "lw %[temp2], 8(%[src]) \n\t" 354 "lw %[temp3], 12(%[src]) \n\t" 355 "lw %[temp4], 16(%[src]) \n\t" 356 "lw %[temp5], 20(%[src]) \n\t" 357 "lw %[temp6], 24(%[src]) \n\t" 358 "lw %[temp7], 28(%[src]) \n\t" 359 "addiu %[src], %[src], 32 \n\t" 360 "sw %[temp0], 0(%[dst]) \n\t" 361 "sw %[temp1], 4(%[dst]) \n\t" 362 "sw %[temp2], 8(%[dst]) \n\t" 363 "sw %[temp3], 12(%[dst]) \n\t" 364 "sw %[temp4], 16(%[dst]) \n\t" 365 "sw %[temp5], 20(%[dst]) \n\t" 366 "sw %[temp6], 24(%[dst]) \n\t" 367 "sw %[temp7], 28(%[dst]) \n\t" 368 "bne %[src], %[loop_end], 1b \n\t" 369 " addiu %[dst], %[dst], 32 \n\t" 370 ".set pop \n\t" 371 372 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 373 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 374 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 375 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 376 [loop_end]"=&r"(loop_end), [src]"+r"(buf1), 377 [dst]"+r"(buf2) 378 : 379 : "memory" 380 ); 381 } 382 } else { // LONG_STOP or ONLY_LONG 383 float *buf1 = buf + 512; 384 float *buf2 = saved; 385 int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 386 int loop_end; 387 388 /* loop unrolled 8 times */ 389 __asm__ volatile ( 390 ".set push \n\t" 391 ".set noreorder \n\t" 392 "addiu %[loop_end], %[src], 2048 \n\t" 393 "1: \n\t" 394 "lw %[temp0], 0(%[src]) \n\t" 395 "lw %[temp1], 4(%[src]) \n\t" 396 "lw %[temp2], 8(%[src]) \n\t" 397 "lw %[temp3], 12(%[src]) \n\t" 398 "lw %[temp4], 16(%[src]) \n\t" 399 "lw %[temp5], 20(%[src]) \n\t" 400 "lw %[temp6], 24(%[src]) \n\t" 401 "lw %[temp7], 28(%[src]) \n\t" 402 "addiu %[src], %[src], 32 \n\t" 403 "sw %[temp0], 0(%[dst]) \n\t" 404 "sw %[temp1], 4(%[dst]) \n\t" 405 "sw %[temp2], 8(%[dst]) \n\t" 406 "sw %[temp3], 12(%[dst]) \n\t" 407 "sw %[temp4], 16(%[dst]) \n\t" 408 "sw %[temp5], 20(%[dst]) \n\t" 409 "sw %[temp6], 24(%[dst]) \n\t" 410 "sw %[temp7], 28(%[dst]) \n\t" 411 "bne %[src], %[loop_end], 1b \n\t" 412 " addiu %[dst], %[dst], 32 \n\t" 413 ".set pop \n\t" 414 415 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 416 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 417 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 418 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 419 [loop_end]"=&r"(loop_end), [src]"+r"(buf1), 420 [dst]"+r"(buf2) 421 : 422 : "memory" 423 ); 424 } 425} 426 427static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce) 428{ 429 const LongTermPrediction *ltp = &sce->ics.ltp; 430 const uint16_t *offsets = sce->ics.swb_offset; 431 int i, sfb; 432 int j, k; 433 434 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { 435 float *predTime = sce->ret; 436 float *predFreq = ac->buf_mdct; 437 float *p_predTime; 438 int16_t num_samples = 2048; 439 440 if (ltp->lag < 1024) 441 num_samples = ltp->lag + 1024; 442 j = (2048 - num_samples) >> 2; 443 k = (2048 - num_samples) & 3; 444 p_predTime = &predTime[num_samples]; 445 446 for (i = 0; i < num_samples; i++) 447 predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef; 448 for (i = 0; i < j; i++) { 449 450 /* loop unrolled 4 times */ 451 __asm__ volatile ( 452 "sw $0, 0(%[p_predTime]) \n\t" 453 "sw $0, 4(%[p_predTime]) \n\t" 454 "sw $0, 8(%[p_predTime]) \n\t" 455 "sw $0, 12(%[p_predTime]) \n\t" 456 "addiu %[p_predTime], %[p_predTime], 16 \n\t" 457 458 : [p_predTime]"+r"(p_predTime) 459 : 460 : "memory" 461 ); 462 } 463 for (i = 0; i < k; i++) { 464 465 __asm__ volatile ( 466 "sw $0, 0(%[p_predTime]) \n\t" 467 "addiu %[p_predTime], %[p_predTime], 4 \n\t" 468 469 : [p_predTime]"+r"(p_predTime) 470 : 471 : "memory" 472 ); 473 } 474 475 ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics); 476 477 if (sce->tns.present) 478 ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0); 479 480 for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++) 481 if (ltp->used[sfb]) 482 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++) 483 sce->coeffs[i] += predFreq[i]; 484 } 485} 486 487#if HAVE_MIPSFPU 488static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce) 489{ 490 IndividualChannelStream *ics = &sce->ics; 491 float *saved = sce->saved; 492 float *saved_ltp = sce->coeffs; 493 const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; 494 const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 495 int i; 496 int loop_end, loop_end1, loop_end2; 497 float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11; 498 499 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 500 float *buf = saved; 501 float *buf0 = saved_ltp; 502 float *p_saved_ltp = saved_ltp + 576; 503 float *ptr1 = &saved_ltp[512]; 504 float *ptr2 = &ac->buf_mdct[1023]; 505 float *ptr3 = (float*)&swindow[63]; 506 loop_end1 = (int)(p_saved_ltp + 448); 507 508 /* loop unrolled 8 times */ 509 __asm__ volatile ( 510 ".set push \n\t" 511 ".set noreorder \n\t" 512 "addiu %[loop_end], %[src], 2048 \n\t" 513 "1: \n\t" 514 "lw %[temp0], 0(%[src]) \n\t" 515 "lw %[temp1], 4(%[src]) \n\t" 516 "lw %[temp2], 8(%[src]) \n\t" 517 "lw %[temp3], 12(%[src]) \n\t" 518 "lw %[temp4], 16(%[src]) \n\t" 519 "lw %[temp5], 20(%[src]) \n\t" 520 "lw %[temp6], 24(%[src]) \n\t" 521 "lw %[temp7], 28(%[src]) \n\t" 522 "addiu %[src], %[src], 32 \n\t" 523 "sw %[temp0], 0(%[dst]) \n\t" 524 "sw %[temp1], 4(%[dst]) \n\t" 525 "sw %[temp2], 8(%[dst]) \n\t" 526 "sw %[temp3], 12(%[dst]) \n\t" 527 "sw %[temp4], 16(%[dst]) \n\t" 528 "sw %[temp5], 20(%[dst]) \n\t" 529 "sw %[temp6], 24(%[dst]) \n\t" 530 "sw %[temp7], 28(%[dst]) \n\t" 531 "bne %[src], %[loop_end], 1b \n\t" 532 " addiu %[dst], %[dst], 32 \n\t" 533 ".set pop \n\t" 534 535 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 536 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 537 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 538 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 539 [loop_end]"=&r"(loop_end), [src]"+r"(buf), 540 [dst]"+r"(buf0) 541 : 542 : "memory" 543 ); 544 545 /* loop unrolled 8 times */ 546 __asm__ volatile ( 547 "1: \n\t" 548 "sw $0, 0(%[p_saved_ltp]) \n\t" 549 "sw $0, 4(%[p_saved_ltp]) \n\t" 550 "sw $0, 8(%[p_saved_ltp]) \n\t" 551 "sw $0, 12(%[p_saved_ltp]) \n\t" 552 "sw $0, 16(%[p_saved_ltp]) \n\t" 553 "sw $0, 20(%[p_saved_ltp]) \n\t" 554 "sw $0, 24(%[p_saved_ltp]) \n\t" 555 "sw $0, 28(%[p_saved_ltp]) \n\t" 556 "addiu %[p_saved_ltp], %[p_saved_ltp], 32 \n\t" 557 "bne %[p_saved_ltp], %[loop_end1], 1b \n\t" 558 559 : [p_saved_ltp]"+r"(p_saved_ltp) 560 : [loop_end1]"r"(loop_end1) 561 : "memory" 562 ); 563 564 ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); 565 for (i = 0; i < 16; i++){ 566 /* loop unrolled 4 times */ 567 __asm__ volatile ( 568 "lwc1 %[temp0], 0(%[ptr2]) \n\t" 569 "lwc1 %[temp1], -4(%[ptr2]) \n\t" 570 "lwc1 %[temp2], -8(%[ptr2]) \n\t" 571 "lwc1 %[temp3], -12(%[ptr2]) \n\t" 572 "lwc1 %[temp4], 0(%[ptr3]) \n\t" 573 "lwc1 %[temp5], -4(%[ptr3]) \n\t" 574 "lwc1 %[temp6], -8(%[ptr3]) \n\t" 575 "lwc1 %[temp7], -12(%[ptr3]) \n\t" 576 "mul.s %[temp8], %[temp0], %[temp4] \n\t" 577 "mul.s %[temp9], %[temp1], %[temp5] \n\t" 578 "mul.s %[temp10], %[temp2], %[temp6] \n\t" 579 "mul.s %[temp11], %[temp3], %[temp7] \n\t" 580 "swc1 %[temp8], 0(%[ptr1]) \n\t" 581 "swc1 %[temp9], 4(%[ptr1]) \n\t" 582 "swc1 %[temp10], 8(%[ptr1]) \n\t" 583 "swc1 %[temp11], 12(%[ptr1]) \n\t" 584 "addiu %[ptr1], %[ptr1], 16 \n\t" 585 "addiu %[ptr2], %[ptr2], -16 \n\t" 586 "addiu %[ptr3], %[ptr3], -16 \n\t" 587 588 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), 589 [temp2]"=&f"(temp2), [temp3]"=&f"(temp3), 590 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), 591 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), 592 [temp8]"=&f"(temp8), [temp9]"=&f"(temp9), 593 [temp10]"=&f"(temp10), [temp11]"=&f"(temp11), 594 [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3) 595 : 596 : "memory" 597 ); 598 } 599 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 600 float *buff0 = saved; 601 float *buff1 = saved_ltp; 602 float *ptr1 = &saved_ltp[512]; 603 float *ptr2 = &ac->buf_mdct[1023]; 604 float *ptr3 = (float*)&swindow[63]; 605 loop_end = (int)(saved + 448); 606 607 /* loop unrolled 8 times */ 608 __asm__ volatile ( 609 ".set push \n\t" 610 ".set noreorder \n\t" 611 "1: \n\t" 612 "lw %[temp0], 0(%[src]) \n\t" 613 "lw %[temp1], 4(%[src]) \n\t" 614 "lw %[temp2], 8(%[src]) \n\t" 615 "lw %[temp3], 12(%[src]) \n\t" 616 "lw %[temp4], 16(%[src]) \n\t" 617 "lw %[temp5], 20(%[src]) \n\t" 618 "lw %[temp6], 24(%[src]) \n\t" 619 "lw %[temp7], 28(%[src]) \n\t" 620 "addiu %[src], %[src], 32 \n\t" 621 "sw %[temp0], 0(%[dst]) \n\t" 622 "sw %[temp1], 4(%[dst]) \n\t" 623 "sw %[temp2], 8(%[dst]) \n\t" 624 "sw %[temp3], 12(%[dst]) \n\t" 625 "sw %[temp4], 16(%[dst]) \n\t" 626 "sw %[temp5], 20(%[dst]) \n\t" 627 "sw %[temp6], 24(%[dst]) \n\t" 628 "sw %[temp7], 28(%[dst]) \n\t" 629 "sw $0, 2304(%[dst]) \n\t" 630 "sw $0, 2308(%[dst]) \n\t" 631 "sw $0, 2312(%[dst]) \n\t" 632 "sw $0, 2316(%[dst]) \n\t" 633 "sw $0, 2320(%[dst]) \n\t" 634 "sw $0, 2324(%[dst]) \n\t" 635 "sw $0, 2328(%[dst]) \n\t" 636 "sw $0, 2332(%[dst]) \n\t" 637 "bne %[src], %[loop_end], 1b \n\t" 638 " addiu %[dst], %[dst], 32 \n\t" 639 ".set pop \n\t" 640 641 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 642 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 643 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 644 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 645 [src]"+r"(buff0), [dst]"+r"(buff1) 646 : [loop_end]"r"(loop_end) 647 : "memory" 648 ); 649 ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960, &swindow[64], 64); 650 for (i = 0; i < 16; i++){ 651 /* loop unrolled 8 times */ 652 __asm__ volatile ( 653 "lwc1 %[temp0], 0(%[ptr2]) \n\t" 654 "lwc1 %[temp1], -4(%[ptr2]) \n\t" 655 "lwc1 %[temp2], -8(%[ptr2]) \n\t" 656 "lwc1 %[temp3], -12(%[ptr2]) \n\t" 657 "lwc1 %[temp4], 0(%[ptr3]) \n\t" 658 "lwc1 %[temp5], -4(%[ptr3]) \n\t" 659 "lwc1 %[temp6], -8(%[ptr3]) \n\t" 660 "lwc1 %[temp7], -12(%[ptr3]) \n\t" 661 "mul.s %[temp8], %[temp0], %[temp4] \n\t" 662 "mul.s %[temp9], %[temp1], %[temp5] \n\t" 663 "mul.s %[temp10], %[temp2], %[temp6] \n\t" 664 "mul.s %[temp11], %[temp3], %[temp7] \n\t" 665 "swc1 %[temp8], 0(%[ptr1]) \n\t" 666 "swc1 %[temp9], 4(%[ptr1]) \n\t" 667 "swc1 %[temp10], 8(%[ptr1]) \n\t" 668 "swc1 %[temp11], 12(%[ptr1]) \n\t" 669 "addiu %[ptr1], %[ptr1], 16 \n\t" 670 "addiu %[ptr2], %[ptr2], -16 \n\t" 671 "addiu %[ptr3], %[ptr3], -16 \n\t" 672 673 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), 674 [temp2]"=&f"(temp2), [temp3]"=&f"(temp3), 675 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), 676 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), 677 [temp8]"=&f"(temp8), [temp9]"=&f"(temp9), 678 [temp10]"=&f"(temp10), [temp11]"=&f"(temp11), 679 [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3) 680 : 681 : "memory" 682 ); 683 } 684 } else { // LONG_STOP or ONLY_LONG 685 float *ptr1, *ptr2, *ptr3; 686 ac->fdsp.vector_fmul_reverse(saved_ltp, ac->buf_mdct + 512, &lwindow[512], 512); 687 688 ptr1 = &saved_ltp[512]; 689 ptr2 = &ac->buf_mdct[1023]; 690 ptr3 = (float*)&lwindow[511]; 691 692 for (i = 0; i < 512; i+=4){ 693 /* loop unrolled 4 times */ 694 __asm__ volatile ( 695 "lwc1 %[temp0], 0(%[ptr2]) \n\t" 696 "lwc1 %[temp1], -4(%[ptr2]) \n\t" 697 "lwc1 %[temp2], -8(%[ptr2]) \n\t" 698 "lwc1 %[temp3], -12(%[ptr2]) \n\t" 699 "lwc1 %[temp4], 0(%[ptr3]) \n\t" 700 "lwc1 %[temp5], -4(%[ptr3]) \n\t" 701 "lwc1 %[temp6], -8(%[ptr3]) \n\t" 702 "lwc1 %[temp7], -12(%[ptr3]) \n\t" 703 "mul.s %[temp8], %[temp0], %[temp4] \n\t" 704 "mul.s %[temp9], %[temp1], %[temp5] \n\t" 705 "mul.s %[temp10], %[temp2], %[temp6] \n\t" 706 "mul.s %[temp11], %[temp3], %[temp7] \n\t" 707 "swc1 %[temp8], 0(%[ptr1]) \n\t" 708 "swc1 %[temp9], 4(%[ptr1]) \n\t" 709 "swc1 %[temp10], 8(%[ptr1]) \n\t" 710 "swc1 %[temp11], 12(%[ptr1]) \n\t" 711 "addiu %[ptr1], %[ptr1], 16 \n\t" 712 "addiu %[ptr2], %[ptr2], -16 \n\t" 713 "addiu %[ptr3], %[ptr3], -16 \n\t" 714 715 : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), 716 [temp2]"=&f"(temp2), [temp3]"=&f"(temp3), 717 [temp4]"=&f"(temp4), [temp5]"=&f"(temp5), 718 [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), 719 [temp8]"=&f"(temp8), [temp9]"=&f"(temp9), 720 [temp10]"=&f"(temp10), [temp11]"=&f"(temp11), 721 [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), 722 [ptr3]"+r"(ptr3) 723 : 724 : "memory" 725 ); 726 } 727 } 728 729 { 730 float *buf1 = sce->ltp_state+1024; 731 float *buf2 = sce->ltp_state; 732 float *buf3 = sce->ret; 733 float *buf4 = sce->ltp_state+1024; 734 float *buf5 = saved_ltp; 735 float *buf6 = sce->ltp_state+2048; 736 737 /* loops unrolled 8 times */ 738 __asm__ volatile ( 739 ".set push \n\t" 740 ".set noreorder \n\t" 741 "addiu %[loop_end], %[src], 4096 \n\t" 742 "addiu %[loop_end1], %[src1], 4096 \n\t" 743 "addiu %[loop_end2], %[src2], 4096 \n\t" 744 "1: \n\t" 745 "lw %[temp0], 0(%[src]) \n\t" 746 "lw %[temp1], 4(%[src]) \n\t" 747 "lw %[temp2], 8(%[src]) \n\t" 748 "lw %[temp3], 12(%[src]) \n\t" 749 "lw %[temp4], 16(%[src]) \n\t" 750 "lw %[temp5], 20(%[src]) \n\t" 751 "lw %[temp6], 24(%[src]) \n\t" 752 "lw %[temp7], 28(%[src]) \n\t" 753 "addiu %[src], %[src], 32 \n\t" 754 "sw %[temp0], 0(%[dst]) \n\t" 755 "sw %[temp1], 4(%[dst]) \n\t" 756 "sw %[temp2], 8(%[dst]) \n\t" 757 "sw %[temp3], 12(%[dst]) \n\t" 758 "sw %[temp4], 16(%[dst]) \n\t" 759 "sw %[temp5], 20(%[dst]) \n\t" 760 "sw %[temp6], 24(%[dst]) \n\t" 761 "sw %[temp7], 28(%[dst]) \n\t" 762 "bne %[src], %[loop_end], 1b \n\t" 763 " addiu %[dst], %[dst], 32 \n\t" 764 "2: \n\t" 765 "lw %[temp0], 0(%[src1]) \n\t" 766 "lw %[temp1], 4(%[src1]) \n\t" 767 "lw %[temp2], 8(%[src1]) \n\t" 768 "lw %[temp3], 12(%[src1]) \n\t" 769 "lw %[temp4], 16(%[src1]) \n\t" 770 "lw %[temp5], 20(%[src1]) \n\t" 771 "lw %[temp6], 24(%[src1]) \n\t" 772 "lw %[temp7], 28(%[src1]) \n\t" 773 "addiu %[src1], %[src1], 32 \n\t" 774 "sw %[temp0], 0(%[dst1]) \n\t" 775 "sw %[temp1], 4(%[dst1]) \n\t" 776 "sw %[temp2], 8(%[dst1]) \n\t" 777 "sw %[temp3], 12(%[dst1]) \n\t" 778 "sw %[temp4], 16(%[dst1]) \n\t" 779 "sw %[temp5], 20(%[dst1]) \n\t" 780 "sw %[temp6], 24(%[dst1]) \n\t" 781 "sw %[temp7], 28(%[dst1]) \n\t" 782 "bne %[src1], %[loop_end1], 2b \n\t" 783 " addiu %[dst1], %[dst1], 32 \n\t" 784 "3: \n\t" 785 "lw %[temp0], 0(%[src2]) \n\t" 786 "lw %[temp1], 4(%[src2]) \n\t" 787 "lw %[temp2], 8(%[src2]) \n\t" 788 "lw %[temp3], 12(%[src2]) \n\t" 789 "lw %[temp4], 16(%[src2]) \n\t" 790 "lw %[temp5], 20(%[src2]) \n\t" 791 "lw %[temp6], 24(%[src2]) \n\t" 792 "lw %[temp7], 28(%[src2]) \n\t" 793 "addiu %[src2], %[src2], 32 \n\t" 794 "sw %[temp0], 0(%[dst2]) \n\t" 795 "sw %[temp1], 4(%[dst2]) \n\t" 796 "sw %[temp2], 8(%[dst2]) \n\t" 797 "sw %[temp3], 12(%[dst2]) \n\t" 798 "sw %[temp4], 16(%[dst2]) \n\t" 799 "sw %[temp5], 20(%[dst2]) \n\t" 800 "sw %[temp6], 24(%[dst2]) \n\t" 801 "sw %[temp7], 28(%[dst2]) \n\t" 802 "bne %[src2], %[loop_end2], 3b \n\t" 803 " addiu %[dst2], %[dst2], 32 \n\t" 804 ".set pop \n\t" 805 806 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 807 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 808 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 809 [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), 810 [loop_end]"=&r"(loop_end), [loop_end1]"=&r"(loop_end1), 811 [loop_end2]"=&r"(loop_end2), [src]"+r"(buf1), 812 [dst]"+r"(buf2), [src1]"+r"(buf3), [dst1]"+r"(buf4), 813 [src2]"+r"(buf5), [dst2]"+r"(buf6) 814 : 815 : "memory" 816 ); 817 } 818} 819#endif /* HAVE_MIPSFPU */ 820#endif /* HAVE_INLINE_ASM */ 821 822void ff_aacdec_init_mips(AACContext *c) 823{ 824#if HAVE_INLINE_ASM 825 c->imdct_and_windowing = imdct_and_windowing_mips; 826 c->apply_ltp = apply_ltp_mips; 827#if HAVE_MIPSFPU 828 c->update_ltp = update_ltp_mips; 829#endif /* HAVE_MIPSFPU */ 830#endif /* HAVE_INLINE_ASM */ 831} 832