1;****************************************************************************** 2;* x86-optimized functions for gradfun filter 3;* 4;* This file is part of FFmpeg. 5;* 6;* FFmpeg is free software; you can redistribute it and/or 7;* modify it under the terms of the GNU Lesser General Public 8;* License as published by the Free Software Foundation; either 9;* version 2.1 of the License, or (at your option) any later version. 10;* 11;* FFmpeg is distributed in the hope that it will be useful, 12;* but WITHOUT ANY WARRANTY; without even the implied warranty of 13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14;* Lesser General Public License for more details. 15;* 16;* You should have received a copy of the GNU Lesser General Public 17;* License along with FFmpeg; if not, write to the Free Software 18;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19;****************************************************************************** 20 21%include "libavutil/x86/x86util.asm" 22 23SECTION_RODATA 24 25pw_7f: times 8 dw 0x7F 26pw_ff: times 8 dw 0xFF 27 28SECTION .text 29 30%macro FILTER_LINE 1 31 movh m0, [r2+r0] 32 movh m1, [r3+r0] 33 punpcklbw m0, m7 34 punpcklwd m1, m1 35 psllw m0, 7 36 psubw m1, m0 37 PABSW m2, m1 38 pmulhuw m2, m5 39 psubw m2, m6 40 pminsw m2, m7 41 pmullw m2, m2 42 psllw m1, 2 43 paddw m0, %1 44 pmulhw m1, m2 45 paddw m0, m1 46 psraw m0, 7 47 packuswb m0, m0 48 movh [r1+r0], m0 49%endmacro 50 51INIT_MMX mmxext 52cglobal gradfun_filter_line, 6, 6 53 movh m5, r4d 54 pxor m7, m7 55 pshufw m5, m5,0 56 mova m6, [pw_7f] 57 mova m3, [r5] 58 mova m4, [r5+8] 59.loop: 60 FILTER_LINE m3 61 add r0, 4 62 jge .end 63 FILTER_LINE m4 64 add r0, 4 65 jl .loop 66.end: 67 REP_RET 68 69INIT_XMM ssse3 70cglobal gradfun_filter_line, 6, 6, 8 71 movd m5, r4d 72 pxor m7, m7 73 pshuflw m5, m5, 0 74 mova m6, [pw_7f] 75 punpcklqdq m5, m5 76 mova m4, [r5] 77.loop: 78 FILTER_LINE m4 79 add r0, 8 80 jl .loop 81 REP_RET 82 83%macro BLUR_LINE 1 84cglobal gradfun_blur_line_%1, 6, 6, 8 85 mova m7, [pw_ff] 86.loop: 87 %1 m0, [r4+r0] 88 %1 m1, [r5+r0] 89 mova m2, m0 90 mova m3, m1 91 psrlw m0, 8 92 psrlw m1, 8 93 pand m2, m7 94 pand m3, m7 95 paddw m0, m1 96 paddw m2, m3 97 paddw m0, m2 98 paddw m0, [r2+r0] 99 mova m1, [r1+r0] 100 mova [r1+r0], m0 101 psubw m0, m1 102 mova [r3+r0], m0 103 add r0, 16 104 jl .loop 105 REP_RET 106%endmacro 107 108INIT_XMM sse2 109BLUR_LINE movdqa 110BLUR_LINE movdqu 111