1;****************************************************************************** 2;* Copyright (c) 2012 Loren Merritt 3;* 4;* This file is part of FFmpeg. 5;* 6;* FFmpeg is free software; you can redistribute it and/or 7;* modify it under the terms of the GNU Lesser General Public 8;* License as published by the Free Software Foundation; either 9;* version 2.1 of the License, or (at your option) any later version. 10;* 11;* FFmpeg is distributed in the hope that it will be useful, 12;* but WITHOUT ANY WARRANTY; without even the implied warranty of 13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14;* Lesser General Public License for more details. 15;* 16;* You should have received a copy of the GNU Lesser General Public 17;* License along with FFmpeg; if not, write to the Free Software 18;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19;****************************************************************************** 20 21%include "libavutil/x86/x86util.asm" 22 23SECTION .text 24 25%macro LOWPASS 3 ; prevsample, cursample, lut 26 sub %1q, %2q 27%if lut_bits != 8 28 sar %1q, 8-lut_bits 29%endif 30 movsx %1d, word [%3q+%1q*2] 31 add %1d, %2d 32%endmacro 33 34%macro LOAD 3 ; dstreg, x, bitdepth 35%if %3 == 8 36 movzx %1, byte [srcq+%2] 37%else 38 movzx %1, word [srcq+(%2)*2] 39%endif 40%if %3 != 16 41 shl %1, 16-%3 42 add %1, (1<<(15-%3))-1 43%endif 44%endmacro 45 46%macro HQDN3D_ROW 1 ; bitdepth 47%if ARCH_X86_64 48cglobal hqdn3d_row_%1_x86, 7,10,0, src, dst, lineant, frameant, width, spatial, temporal, pixelant, t0, t1 49%else 50cglobal hqdn3d_row_%1_x86, 7,7,0, src, dst, lineant, frameant, width, spatial, temporal 51%endif 52 %assign bytedepth (%1+7)>>3 53 %assign lut_bits 4+4*(%1/16) 54 dec widthq 55 lea srcq, [srcq+widthq*bytedepth] 56 lea dstq, [dstq+widthq*bytedepth] 57 lea frameantq, [frameantq+widthq*2] 58 lea lineantq, [lineantq+widthq*2] 59 neg widthq 60 %define xq widthq 61%if ARCH_X86_32 62 mov dstmp, dstq 63 mov srcmp, srcq 64 mov frameantmp, frameantq 65 mov lineantmp, lineantq 66 %define dstq r0 67 %define frameantq r0 68 %define lineantq r0 69 %define pixelantq r1 70 %define pixelantd r1d 71 DECLARE_REG_TMP 2,3 72%endif 73 LOAD pixelantd, xq, %1 74ALIGN 16 75.loop: 76 movifnidn srcq, srcmp 77 LOAD t0d, xq+1, %1 ; skip on the last iteration to avoid overread 78.loop2: 79 movifnidn lineantq, lineantmp 80 movzx t1d, word [lineantq+xq*2] 81 LOWPASS t1, pixelant, spatial 82 mov [lineantq+xq*2], t1w 83 LOWPASS pixelant, t0, spatial 84 movifnidn frameantq, frameantmp 85 movzx t0d, word [frameantq+xq*2] 86 LOWPASS t0, t1, temporal 87 mov [frameantq+xq*2], t0w 88 movifnidn dstq, dstmp 89%if %1 != 16 90 shr t0d, 16-%1 ; could eliminate this by storing from t0h, but only with some contraints on register allocation 91%endif 92%if %1 == 8 93 mov [dstq+xq], t0b 94%else 95 mov [dstq+xq*2], t0w 96%endif 97 inc xq 98 jl .loop 99 je .loop2 100 REP_RET 101%endmacro ; HQDN3D_ROW 102 103HQDN3D_ROW 8 104HQDN3D_ROW 9 105HQDN3D_ROW 10 106HQDN3D_ROW 16 107