1;******************************************************************************
2;* x86-optimized functions for gradfun filter
3;*
4;* This file is part of FFmpeg.
5;*
6;* FFmpeg is free software; you can redistribute it and/or
7;* modify it under the terms of the GNU Lesser General Public
8;* License as published by the Free Software Foundation; either
9;* version 2.1 of the License, or (at your option) any later version.
10;*
11;* FFmpeg is distributed in the hope that it will be useful,
12;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14;* Lesser General Public License for more details.
15;*
16;* You should have received a copy of the GNU Lesser General Public
17;* License along with FFmpeg; if not, write to the Free Software
18;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19;******************************************************************************
20
21%include "libavutil/x86/x86util.asm"
22
23SECTION_RODATA
24
25pw_7f: times 8 dw 0x7F
26pw_ff: times 8 dw 0xFF
27
28SECTION .text
29
30%macro FILTER_LINE 1
31    movh       m0, [r2+r0]
32    movh       m1, [r3+r0]
33    punpcklbw  m0, m7
34    punpcklwd  m1, m1
35    psllw      m0, 7
36    psubw      m1, m0
37    PABSW      m2, m1
38    pmulhuw    m2, m5
39    psubw      m2, m6
40    pminsw     m2, m7
41    pmullw     m2, m2
42    psllw      m1, 2
43    paddw      m0, %1
44    pmulhw     m1, m2
45    paddw      m0, m1
46    psraw      m0, 7
47    packuswb   m0, m0
48    movh  [r1+r0], m0
49%endmacro
50
51INIT_MMX mmxext
52cglobal gradfun_filter_line, 6, 6
53    movh      m5, r4d
54    pxor      m7, m7
55    pshufw    m5, m5,0
56    mova      m6, [pw_7f]
57    mova      m3, [r5]
58    mova      m4, [r5+8]
59.loop:
60    FILTER_LINE m3
61    add       r0, 4
62    jge .end
63    FILTER_LINE m4
64    add       r0, 4
65    jl .loop
66.end:
67    REP_RET
68
69INIT_XMM ssse3
70cglobal gradfun_filter_line, 6, 6, 8
71    movd       m5, r4d
72    pxor       m7, m7
73    pshuflw    m5, m5, 0
74    mova       m6, [pw_7f]
75    punpcklqdq m5, m5
76    mova       m4, [r5]
77.loop:
78    FILTER_LINE m4
79    add        r0, 8
80    jl .loop
81    REP_RET
82
83%macro BLUR_LINE 1
84cglobal gradfun_blur_line_%1, 6, 6, 8
85    mova        m7, [pw_ff]
86.loop:
87    %1          m0, [r4+r0]
88    %1          m1, [r5+r0]
89    mova        m2, m0
90    mova        m3, m1
91    psrlw       m0, 8
92    psrlw       m1, 8
93    pand        m2, m7
94    pand        m3, m7
95    paddw       m0, m1
96    paddw       m2, m3
97    paddw       m0, m2
98    paddw       m0, [r2+r0]
99    mova        m1, [r1+r0]
100    mova   [r1+r0], m0
101    psubw       m0, m1
102    mova   [r3+r0], m0
103    add         r0, 16
104    jl .loop
105    REP_RET
106%endmacro
107
108INIT_XMM sse2
109BLUR_LINE movdqa
110BLUR_LINE movdqu
111