1/*
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23function ff_pix_abs16_armv6, export=1
24        ldr             r0,  [sp]
25        push            {r4-r9, lr}
26        mov             r12, #0
27        mov             lr,  #0
28        ldm             r1,  {r4-r7}
29        ldr             r8,  [r2]
301:
31        ldr             r9,  [r2, #4]
32        pld             [r1, r3]
33        usada8          r12, r4,  r8,  r12
34        ldr             r8,  [r2, #8]
35        pld             [r2, r3]
36        usada8          lr,  r5,  r9,  lr
37        ldr             r9,  [r2, #12]
38        usada8          r12, r6,  r8,  r12
39        subs            r0,  r0,  #1
40        usada8          lr,  r7,  r9,  lr
41        beq             2f
42        add             r1,  r1,  r3
43        ldm             r1,  {r4-r7}
44        add             r2,  r2,  r3
45        ldr             r8,  [r2]
46        b               1b
472:
48        add             r0,  r12, lr
49        pop             {r4-r9, pc}
50endfunc
51
52function ff_pix_abs16_x2_armv6, export=1
53        ldr             r12, [sp]
54        push            {r4-r11, lr}
55        mov             r0,  #0
56        mov             lr,  #1
57        orr             lr,  lr,  lr,  lsl #8
58        orr             lr,  lr,  lr,  lsl #16
591:
60        ldr             r8,  [r2]
61        ldr             r9,  [r2, #4]
62        lsr             r10, r8,  #8
63        ldr             r4,  [r1]
64        lsr             r6,  r9,  #8
65        orr             r10, r10, r9,  lsl #24
66        ldr             r5,  [r2, #8]
67        eor             r11, r8,  r10
68        uhadd8          r7,  r8,  r10
69        orr             r6,  r6,  r5,  lsl #24
70        and             r11, r11, lr
71        uadd8           r7,  r7,  r11
72        ldr             r8,  [r1, #4]
73        usada8          r0,  r4,  r7,  r0
74        eor             r7,  r9,  r6
75        lsr             r10, r5,  #8
76        and             r7,  r7,  lr
77        uhadd8          r4,  r9,  r6
78        ldr             r6,  [r2, #12]
79        uadd8           r4,  r4,  r7
80        pld             [r1, r3]
81        orr             r10, r10, r6,  lsl #24
82        usada8          r0,  r8,  r4,  r0
83        ldr             r4,  [r1, #8]
84        eor             r11, r5,  r10
85        ldrb            r7,  [r2, #16]
86        and             r11, r11, lr
87        uhadd8          r8,  r5,  r10
88        ldr             r5,  [r1, #12]
89        uadd8           r8,  r8,  r11
90        pld             [r2, r3]
91        lsr             r10, r6,  #8
92        usada8          r0,  r4,  r8,  r0
93        orr             r10, r10, r7,  lsl #24
94        subs            r12,  r12,  #1
95        eor             r11, r6,  r10
96        add             r1,  r1,  r3
97        uhadd8          r9,  r6,  r10
98        and             r11, r11, lr
99        uadd8           r9,  r9,  r11
100        add             r2,  r2,  r3
101        usada8          r0,  r5,  r9,  r0
102        bgt             1b
103
104        pop             {r4-r11, pc}
105endfunc
106
107.macro  usad_y2         p0,  p1,  p2,  p3,  n0,  n1,  n2,  n3
108        ldr             \n0, [r2]
109        eor             \n1, \p0, \n0
110        uhadd8          \p0, \p0, \n0
111        and             \n1, \n1, lr
112        ldr             \n2, [r1]
113        uadd8           \p0, \p0, \n1
114        ldr             \n1, [r2, #4]
115        usada8          r0,  \p0, \n2, r0
116        pld             [r1,  r3]
117        eor             \n3, \p1, \n1
118        uhadd8          \p1, \p1, \n1
119        and             \n3, \n3, lr
120        ldr             \p0, [r1, #4]
121        uadd8           \p1, \p1, \n3
122        ldr             \n2, [r2, #8]
123        usada8          r0,  \p1, \p0, r0
124        pld             [r2,  r3]
125        eor             \p0, \p2, \n2
126        uhadd8          \p2, \p2, \n2
127        and             \p0, \p0, lr
128        ldr             \p1, [r1, #8]
129        uadd8           \p2, \p2, \p0
130        ldr             \n3, [r2, #12]
131        usada8          r0,  \p2, \p1, r0
132        eor             \p1, \p3, \n3
133        uhadd8          \p3, \p3, \n3
134        and             \p1, \p1, lr
135        ldr             \p0,  [r1, #12]
136        uadd8           \p3, \p3, \p1
137        add             r1,  r1,  r3
138        usada8          r0,  \p3, \p0,  r0
139        add             r2,  r2,  r3
140.endm
141
142function ff_pix_abs16_y2_armv6, export=1
143        pld             [r1]
144        pld             [r2]
145        ldr             r12, [sp]
146        push            {r4-r11, lr}
147        mov             r0,  #0
148        mov             lr,  #1
149        orr             lr,  lr,  lr,  lsl #8
150        orr             lr,  lr,  lr,  lsl #16
151        ldr             r4,  [r2]
152        ldr             r5,  [r2, #4]
153        ldr             r6,  [r2, #8]
154        ldr             r7,  [r2, #12]
155        add             r2,  r2,  r3
1561:
157        usad_y2         r4,  r5,  r6,  r7,  r8,  r9,  r10, r11
158        subs            r12, r12, #2
159        usad_y2         r8,  r9,  r10, r11, r4,  r5,  r6,  r7
160        bgt             1b
161
162        pop             {r4-r11, pc}
163endfunc
164
165function ff_pix_abs8_armv6, export=1
166        pld             [r2, r3]
167        ldr             r12, [sp]
168        push            {r4-r9, lr}
169        mov             r0,  #0
170        mov             lr,  #0
171        ldrd_post       r4,  r5,  r1,  r3
1721:
173        subs            r12, r12, #2
174        ldr             r7,  [r2, #4]
175        ldr_post        r6,  r2,  r3
176        ldrd_post       r8,  r9,  r1,  r3
177        usada8          r0,  r4,  r6,  r0
178        pld             [r2, r3]
179        usada8          lr,  r5,  r7,  lr
180        ldr             r7,  [r2, #4]
181        ldr_post        r6,  r2,  r3
182        beq             2f
183        ldrd_post       r4,  r5,  r1,  r3
184        usada8          r0,  r8,  r6,  r0
185        pld             [r2, r3]
186        usada8          lr,  r9,  r7,  lr
187        b               1b
1882:
189        usada8          r0,  r8,  r6,  r0
190        usada8          lr,  r9,  r7,  lr
191        add             r0,  r0,  lr
192        pop             {r4-r9, pc}
193endfunc
194
195function ff_sse16_armv6, export=1
196        ldr             r12, [sp]
197        push            {r4-r9, lr}
198        mov             r0,  #0
1991:
200        ldrd            r4,  r5,  [r1]
201        ldr             r8,  [r2]
202        uxtb16          lr,  r4
203        uxtb16          r4,  r4,  ror #8
204        uxtb16          r9,  r8
205        uxtb16          r8,  r8,  ror #8
206        ldr             r7,  [r2, #4]
207        usub16          lr,  lr,  r9
208        usub16          r4,  r4,  r8
209        smlad           r0,  lr,  lr,  r0
210        uxtb16          r6,  r5
211        uxtb16          lr,  r5,  ror #8
212        uxtb16          r8,  r7
213        uxtb16          r9,  r7,  ror #8
214        smlad           r0,  r4,  r4,  r0
215        ldrd            r4,  r5,  [r1, #8]
216        usub16          r6,  r6,  r8
217        usub16          r8,  lr,  r9
218        ldr             r7,  [r2, #8]
219        smlad           r0,  r6,  r6,  r0
220        uxtb16          lr,  r4
221        uxtb16          r4,  r4,  ror #8
222        uxtb16          r9,  r7
223        uxtb16          r7,  r7, ror #8
224        smlad           r0,  r8,  r8,  r0
225        ldr             r8,  [r2, #12]
226        usub16          lr,  lr,  r9
227        usub16          r4,  r4,  r7
228        smlad           r0,  lr,  lr,  r0
229        uxtb16          r6,  r5
230        uxtb16          r5,  r5,  ror #8
231        uxtb16          r9,  r8
232        uxtb16          r8,  r8,  ror #8
233        smlad           r0,  r4,  r4,  r0
234        usub16          r6,  r6,  r9
235        usub16          r5,  r5,  r8
236        smlad           r0,  r6,  r6,  r0
237        add             r1,  r1,  r3
238        add             r2,  r2,  r3
239        subs            r12, r12, #1
240        smlad           r0,  r5,  r5,  r0
241        bgt             1b
242
243        pop             {r4-r9, pc}
244endfunc
245