1/* 2 * Copyright (c) 2012 3 * MIPS Technologies, Inc., California. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * Author: Bojan Zivkovic (bojan@mips.com) 30 * 31 * IIR filter optimized for MIPS floating-point architecture 32 * 33 * This file is part of FFmpeg. 34 * 35 * FFmpeg is free software; you can redistribute it and/or 36 * modify it under the terms of the GNU Lesser General Public 37 * License as published by the Free Software Foundation; either 38 * version 2.1 of the License, or (at your option) any later version. 39 * 40 * FFmpeg is distributed in the hope that it will be useful, 41 * but WITHOUT ANY WARRANTY; without even the implied warranty of 42 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 43 * Lesser General Public License for more details. 44 * 45 * You should have received a copy of the GNU Lesser General Public 46 * License along with FFmpeg; if not, write to the Free Software 47 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 48 */ 49 50 /** 51 * @file 52 * Reference: libavcodec/iirfilter.c 53 */ 54 55#include "libavcodec/iirfilter.h" 56 57#if HAVE_INLINE_ASM 58typedef struct FFIIRFilterCoeffs { 59 int order; 60 float gain; 61 int *cx; 62 float *cy; 63} FFIIRFilterCoeffs; 64 65typedef struct FFIIRFilterState { 66 float x[1]; 67} FFIIRFilterState; 68 69static void ff_iir_filter_flt_mips(const struct FFIIRFilterCoeffs *c, 70 struct FFIIRFilterState *s, int size, 71 const float *src, int sstep, float *dst, int dstep) 72{ 73 if (c->order == 2) { 74 int i; 75 const float *src0 = src; 76 float *dst0 = dst; 77 for (i = 0; i < size; i++) { 78 float in = *src0 * c->gain + s->x[0] * c->cy[0] + s->x[1] * c->cy[1]; 79 *dst0 = s->x[0] + in + s->x[1] * c->cx[1]; 80 s->x[0] = s->x[1]; 81 s->x[1] = in; 82 src0 += sstep; 83 dst0 += dstep; 84 } 85 } else if (c->order == 4) { 86 int i; 87 const float *src0 = src; 88 float *dst0 = dst; 89 float four = 4.0; 90 float six = 6.0; 91 for (i = 0; i < size; i += 4) { 92 float in1, in2, in3, in4; 93 float res1, res2, res3, res4; 94 float *x = s->x; 95 float *cy = c->cy; 96 float gain = c->gain; 97 float src0_0 = src0[0 ]; 98 float src0_1 = src0[sstep ]; 99 float src0_2 = src0[2*sstep]; 100 float src0_3 = src0[3*sstep]; 101 102 __asm__ volatile ( 103 "lwc1 $f0, 0(%[cy]) \n\t" 104 "lwc1 $f4, 0(%[x]) \n\t" 105 "lwc1 $f5, 4(%[x]) \n\t" 106 "lwc1 $f6, 8(%[x]) \n\t" 107 "lwc1 $f7, 12(%[x]) \n\t" 108 "mul.s %[in1], %[src0_0], %[gain] \n\t" 109 "mul.s %[in2], %[src0_1], %[gain] \n\t" 110 "mul.s %[in3], %[src0_2], %[gain] \n\t" 111 "mul.s %[in4], %[src0_3], %[gain] \n\t" 112 "lwc1 $f1, 4(%[cy]) \n\t" 113 "madd.s %[in1], %[in1], $f0, $f4 \n\t" 114 "madd.s %[in2], %[in2], $f0, $f5 \n\t" 115 "madd.s %[in3], %[in3], $f0, $f6 \n\t" 116 "madd.s %[in4], %[in4], $f0, $f7 \n\t" 117 "lwc1 $f2, 8(%[cy]) \n\t" 118 "madd.s %[in1], %[in1], $f1, $f5 \n\t" 119 "madd.s %[in2], %[in2], $f1, $f6 \n\t" 120 "madd.s %[in3], %[in3], $f1, $f7 \n\t" 121 "lwc1 $f3, 12(%[cy]) \n\t" 122 "add.s $f8, $f5, $f7 \n\t" 123 "madd.s %[in1], %[in1], $f2, $f6 \n\t" 124 "madd.s %[in2], %[in2], $f2, $f7 \n\t" 125 "mul.s $f9, $f6, %[six] \n\t" 126 "mul.s $f10, $f7, %[six] \n\t" 127 "madd.s %[in1], %[in1], $f3, $f7 \n\t" 128 "madd.s %[in2], %[in2], $f3, %[in1] \n\t" 129 "madd.s %[in3], %[in3], $f2, %[in1] \n\t" 130 "madd.s %[in4], %[in4], $f1, %[in1] \n\t" 131 "add.s %[res1], $f4, %[in1] \n\t" 132 "swc1 %[in1], 0(%[x]) \n\t" 133 "add.s $f0, $f6, %[in1] \n\t" 134 "madd.s %[in3], %[in3], $f3, %[in2] \n\t" 135 "madd.s %[in4], %[in4], $f2, %[in2] \n\t" 136 "add.s %[res2], $f5, %[in2] \n\t" 137 "madd.s %[res1], %[res1], $f8, %[four] \n\t" 138 "add.s $f8, $f7, %[in2] \n\t" 139 "swc1 %[in2], 4(%[x]) \n\t" 140 "madd.s %[in4], %[in4], $f3, %[in3] \n\t" 141 "add.s %[res3], $f6, %[in3] \n\t" 142 "add.s %[res1], %[res1], $f9 \n\t" 143 "madd.s %[res2], %[res2], $f0, %[four] \n\t" 144 "swc1 %[in3], 8(%[x]) \n\t" 145 "add.s %[res4], $f7, %[in4] \n\t" 146 "madd.s %[res3], %[res3], $f8, %[four] \n\t" 147 "swc1 %[in4], 12(%[x]) \n\t" 148 "add.s %[res2], %[res2], $f10 \n\t" 149 "add.s $f8, %[in1], %[in3] \n\t" 150 "madd.s %[res3], %[res3], %[in1], %[six] \n\t" 151 "madd.s %[res4], %[res4], $f8, %[four] \n\t" 152 "madd.s %[res4], %[res4], %[in2], %[six] \n\t" 153 154 : [in1]"=&f"(in1), [in2]"=&f"(in2), 155 [in3]"=&f"(in3), [in4]"=&f"(in4), 156 [res1]"=&f"(res1), [res2]"=&f"(res2), 157 [res3]"=&f"(res3), [res4]"=&f"(res4) 158 : [src0_0]"f"(src0_0), [src0_1]"f"(src0_1), 159 [src0_2]"f"(src0_2), [src0_3]"f"(src0_3), 160 [gain]"f"(gain), [x]"r"(x), [cy]"r"(cy), 161 [four]"f"(four), [six]"f"(six) 162 : "$f0", "$f1", "$f2", "$f3", 163 "$f4", "$f5", "$f6", "$f7", 164 "$f8", "$f9", "$f10", 165 "memory" 166 ); 167 168 dst0[0 ] = res1; 169 dst0[sstep ] = res2; 170 dst0[2*sstep] = res3; 171 dst0[3*sstep] = res4; 172 173 src0 += 4*sstep; 174 dst0 += 4*dstep; 175 } 176 } else { 177 int i; 178 const float *src0 = src; 179 float *dst0 = dst; 180 for (i = 0; i < size; i++) { 181 int j; 182 float in, res; 183 in = *src0 * c->gain; 184 for(j = 0; j < c->order; j++) 185 in += c->cy[j] * s->x[j]; 186 res = s->x[0] + in + s->x[c->order >> 1] * c->cx[c->order >> 1]; 187 for(j = 1; j < c->order >> 1; j++) 188 res += (s->x[j] + s->x[c->order - j]) * c->cx[j]; 189 for(j = 0; j < c->order - 1; j++) 190 s->x[j] = s->x[j + 1]; 191 *dst0 = res; 192 s->x[c->order - 1] = in; 193 src0 += sstep; 194 dst0 += dstep; 195 } 196 } 197} 198#endif /* HAVE_INLINE_ASM */ 199 200void ff_iir_filter_init_mips(FFIIRFilterContext *f) { 201#if HAVE_INLINE_ASM 202 f->filter_flt = ff_iir_filter_flt_mips; 203#endif /* HAVE_INLINE_ASM */ 204} 205