1/*
2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "libavutil/arm/asm.S"
22
23.macro  call_2x_pixels  type, subp
24function ff_\type\()_pixels16\subp\()_armv6, export=1
25        push            {r0-r3, lr}
26        bl              X(ff_\type\()_pixels8\subp\()_armv6)
27        pop             {r0-r3, lr}
28        add             r0,  r0,  #8
29        add             r1,  r1,  #8
30        b               X(ff_\type\()_pixels8\subp\()_armv6)
31endfunc
32.endm
33
34call_2x_pixels          avg
35call_2x_pixels          put, _x2
36call_2x_pixels          put, _y2
37call_2x_pixels          put, _x2_no_rnd
38call_2x_pixels          put, _y2_no_rnd
39
40function ff_put_pixels16_armv6, export=1
41        push            {r4-r11}
421:
43        ldr             r5,  [r1, #4]
44        ldr             r6,  [r1, #8]
45        ldr             r7,  [r1, #12]
46        ldr_post        r4,  r1,  r2
47        strd            r6,  r7,  [r0, #8]
48        ldr             r9,  [r1, #4]
49        strd_post       r4,  r5,  r0,  r2
50        ldr             r10, [r1, #8]
51        ldr             r11, [r1, #12]
52        ldr_post        r8,  r1,  r2
53        strd            r10, r11, [r0, #8]
54        subs            r3,  r3,  #2
55        strd_post       r8,  r9,  r0,  r2
56        bne             1b
57
58        pop             {r4-r11}
59        bx              lr
60endfunc
61
62function ff_put_pixels8_armv6, export=1
63        push            {r4-r7}
641:
65        ldr             r5,  [r1, #4]
66        ldr_post        r4,  r1,  r2
67        ldr             r7,  [r1, #4]
68        strd_post       r4,  r5,  r0,  r2
69        ldr_post        r6,  r1,  r2
70        subs            r3,  r3,  #2
71        strd_post       r6,  r7,  r0,  r2
72        bne             1b
73
74        pop             {r4-r7}
75        bx              lr
76endfunc
77
78function ff_put_pixels8_x2_armv6, export=1
79        push            {r4-r11, lr}
80        mov             r12, #1
81        orr             r12, r12, r12, lsl #8
82        orr             r12, r12, r12, lsl #16
831:
84        ldr             r4,  [r1]
85        subs            r3,  r3,  #2
86        ldr             r5,  [r1, #4]
87        ldr             r7,  [r1, #5]
88        lsr             r6,  r4,  #8
89        ldr_pre         r8,  r1,  r2
90        orr             r6,  r6,  r5,  lsl #24
91        ldr             r9,  [r1, #4]
92        ldr             r11, [r1, #5]
93        lsr             r10, r8,  #8
94        add             r1,  r1,  r2
95        orr             r10, r10, r9,  lsl #24
96        eor             r14, r4,  r6
97        uhadd8          r4,  r4,  r6
98        eor             r6,  r5,  r7
99        uhadd8          r5,  r5,  r7
100        and             r14, r14, r12
101        and             r6,  r6,  r12
102        uadd8           r4,  r4,  r14
103        eor             r14, r8,  r10
104        uadd8           r5,  r5,  r6
105        eor             r6,  r9,  r11
106        uhadd8          r8,  r8,  r10
107        and             r14, r14, r12
108        uhadd8          r9,  r9,  r11
109        and             r6,  r6,  r12
110        uadd8           r8,  r8,  r14
111        strd_post       r4,  r5,  r0,  r2
112        uadd8           r9,  r9,  r6
113        strd_post       r8,  r9,  r0,  r2
114        bne             1b
115
116        pop             {r4-r11, pc}
117endfunc
118
119function ff_put_pixels8_y2_armv6, export=1
120        push            {r4-r11}
121        mov             r12, #1
122        orr             r12, r12, r12, lsl #8
123        orr             r12, r12, r12, lsl #16
124        ldr             r4,  [r1]
125        ldr             r5,  [r1, #4]
126        ldr_pre         r6,  r1,  r2
127        ldr             r7,  [r1, #4]
1281:
129        subs            r3,  r3,  #2
130        uhadd8          r8,  r4,  r6
131        eor             r10, r4,  r6
132        uhadd8          r9,  r5,  r7
133        eor             r11, r5,  r7
134        and             r10, r10, r12
135        ldr_pre         r4,  r1,  r2
136        uadd8           r8,  r8,  r10
137        and             r11, r11, r12
138        uadd8           r9,  r9,  r11
139        ldr             r5,  [r1, #4]
140        uhadd8          r10, r4,  r6
141        eor             r6,  r4,  r6
142        uhadd8          r11, r5,  r7
143        and             r6,  r6,  r12
144        eor             r7,  r5,  r7
145        uadd8           r10, r10, r6
146        and             r7,  r7,  r12
147        ldrc_pre        ne,  r6,  r1,  r2
148        uadd8           r11, r11, r7
149        strd_post       r8,  r9,  r0,  r2
150        it              ne
151        ldrne           r7,  [r1, #4]
152        strd_post       r10, r11, r0,  r2
153        bne             1b
154
155        pop             {r4-r11}
156        bx              lr
157endfunc
158
159function ff_put_pixels8_x2_no_rnd_armv6, export=1
160        push            {r4-r9, lr}
1611:
162        subs            r3,  r3,  #2
163        ldr             r4,  [r1]
164        ldr             r5,  [r1, #4]
165        ldr             r7,  [r1, #5]
166        ldr_pre         r8,  r1,  r2
167        ldr             r9,  [r1, #4]
168        ldr             r14, [r1, #5]
169        add             r1,  r1,  r2
170        lsr             r6,  r4,  #8
171        orr             r6,  r6,  r5,  lsl #24
172        lsr             r12, r8,  #8
173        orr             r12, r12, r9,  lsl #24
174        uhadd8          r4,  r4,  r6
175        uhadd8          r5,  r5,  r7
176        uhadd8          r8,  r8,  r12
177        uhadd8          r9,  r9,  r14
178        stm             r0,  {r4,r5}
179        add             r0,  r0,  r2
180        stm             r0,  {r8,r9}
181        add             r0,  r0,  r2
182        bne             1b
183
184        pop             {r4-r9, pc}
185endfunc
186
187function ff_put_pixels8_y2_no_rnd_armv6, export=1
188        push            {r4-r9, lr}
189        ldr             r4,  [r1]
190        ldr             r5,  [r1, #4]
191        ldr_pre         r6,  r1,  r2
192        ldr             r7,  [r1, #4]
1931:
194        subs            r3,  r3,  #2
195        uhadd8          r8,  r4,  r6
196        ldr_pre         r4,  r1,  r2
197        uhadd8          r9,  r5,  r7
198        ldr             r5,  [r1, #4]
199        uhadd8          r12, r4,  r6
200        ldrc_pre        ne,  r6,  r1,  r2
201        uhadd8          r14, r5,  r7
202        it              ne
203        ldrne           r7,  [r1, #4]
204        stm             r0,  {r8,r9}
205        add             r0,  r0,  r2
206        stm             r0,  {r12,r14}
207        add             r0,  r0,  r2
208        bne             1b
209
210        pop             {r4-r9, pc}
211endfunc
212
213function ff_avg_pixels8_armv6, export=1
214        pld             [r1, r2]
215        push            {r4-r10, lr}
216        mov             lr,  #1
217        orr             lr,  lr,  lr,  lsl #8
218        orr             lr,  lr,  lr,  lsl #16
219        ldrd            r4,  r5,  [r0]
220        ldr             r10, [r1, #4]
221        ldr_post        r9,  r1,  r2
222        subs            r3,  r3,  #2
2231:
224        pld             [r1, r2]
225        eor             r8,  r4,  r9
226        uhadd8          r4,  r4,  r9
227        eor             r12, r5,  r10
228        ldrd_reg        r6,  r7,  r0,  r2
229        uhadd8          r5,  r5,  r10
230        and             r8,  r8,  lr
231        ldr             r10, [r1, #4]
232        and             r12, r12, lr
233        uadd8           r4,  r4,  r8
234        ldr_post        r9,  r1,  r2
235        eor             r8,  r6,  r9
236        uadd8           r5,  r5,  r12
237        pld             [r1, r2,  lsl #1]
238        eor             r12, r7,  r10
239        uhadd8          r6,  r6,  r9
240        strd_post       r4,  r5,  r0,  r2
241        uhadd8          r7,  r7,  r10
242        beq             2f
243        and             r8,  r8,  lr
244        ldrd_reg        r4,  r5,  r0,  r2
245        uadd8           r6,  r6,  r8
246        ldr             r10, [r1, #4]
247        and             r12, r12, lr
248        subs            r3,  r3,  #2
249        uadd8           r7,  r7,  r12
250        ldr_post        r9,  r1,  r2
251        strd_post       r6,  r7,  r0,  r2
252        b               1b
2532:
254        and             r8,  r8,  lr
255        and             r12, r12, lr
256        uadd8           r6,  r6,  r8
257        uadd8           r7,  r7,  r12
258        strd_post       r6,  r7,  r0,  r2
259
260        pop             {r4-r10, pc}
261endfunc
262