1@
2@ ARMv4 optimized DSP utils
3@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4@
5@ This file is part of FFmpeg.
6@
7@ FFmpeg is free software; you can redistribute it and/or
8@ modify it under the terms of the GNU Lesser General Public
9@ License as published by the Free Software Foundation; either
10@ version 2.1 of the License, or (at your option) any later version.
11@
12@ FFmpeg is distributed in the hope that it will be useful,
13@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15@ Lesser General Public License for more details.
16@
17@ You should have received a copy of the GNU Lesser General Public
18@ License along with FFmpeg; if not, write to the Free Software
19@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20@
21
22#include "config.h"
23#include "asm.S"
24
25        preserve8
26
27#if !HAVE_PLD
28.macro pld reg
29.endm
30#endif
31
32#if HAVE_ARMV5TE
33function ff_prefetch_arm, export=1
34        subs    r2, r2, #1
35        pld     [r0]
36        add     r0, r0, r1
37        bne     ff_prefetch_arm
38        bx      lr
39        .endfunc
40#endif
41
42.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43        mov \Rd0, \Rn0, lsr #(\shift * 8)
44        mov \Rd1, \Rn1, lsr #(\shift * 8)
45        mov \Rd2, \Rn2, lsr #(\shift * 8)
46        mov \Rd3, \Rn3, lsr #(\shift * 8)
47        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51.endm
52.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
53        mov \R0, \R0, lsr #(\shift * 8)
54        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
55        mov \R1, \R1, lsr #(\shift * 8)
56        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
57.endm
58.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
60        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
61        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63.endm
64
65.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67        @ Rmask = 0xFEFEFEFE
68        @ Rn = destroy
69        eor \Rd0, \Rn0, \Rm0
70        eor \Rd1, \Rn1, \Rm1
71        orr \Rn0, \Rn0, \Rm0
72        orr \Rn1, \Rn1, \Rm1
73        and \Rd0, \Rd0, \Rmask
74        and \Rd1, \Rd1, \Rmask
75        sub \Rd0, \Rn0, \Rd0, lsr #1
76        sub \Rd1, \Rn1, \Rd1, lsr #1
77.endm
78
79.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81        @ Rmask = 0xFEFEFEFE
82        @ Rn = destroy
83        eor \Rd0, \Rn0, \Rm0
84        eor \Rd1, \Rn1, \Rm1
85        and \Rn0, \Rn0, \Rm0
86        and \Rn1, \Rn1, \Rm1
87        and \Rd0, \Rd0, \Rmask
88        and \Rd1, \Rd1, \Rmask
89        add \Rd0, \Rn0, \Rd0, lsr #1
90        add \Rd1, \Rn1, \Rd1, lsr #1
91.endm
92
93.macro  JMP_ALIGN tmp, reg
94        ands \tmp, \reg, #3
95        bic  \reg, \reg, #3
96        beq  1f
97        subs \tmp, \tmp, #1
98        beq  2f
99        subs \tmp, \tmp, #1
100        beq  3f
101        b    4f
102.endm
103
104@ ----------------------------------------------------------------
105        .align 5
106function put_pixels16_arm, export=1
107        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108        @ block = word aligned, pixles = unaligned
109        pld [r1]
110        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
111        JMP_ALIGN r5, r1
1121:
113        ldmia r1, {r4-r7}
114        add r1, r1, r2
115        stmia r0, {r4-r7}
116        pld [r1]
117        subs r3, r3, #1
118        add r0, r0, r2
119        bne 1b
120        ldmfd sp!, {r4-r11, pc}
121        .align 5
1222:
123        ldmia r1, {r4-r8}
124        add r1, r1, r2
125        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
126        pld [r1]
127        subs r3, r3, #1
128        stmia r0, {r9-r12}
129        add r0, r0, r2
130        bne 2b
131        ldmfd sp!, {r4-r11, pc}
132        .align 5
1333:
134        ldmia r1, {r4-r8}
135        add r1, r1, r2
136        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
137        pld [r1]
138        subs r3, r3, #1
139        stmia r0, {r9-r12}
140        add r0, r0, r2
141        bne 3b
142        ldmfd sp!, {r4-r11, pc}
143        .align 5
1444:
145        ldmia r1, {r4-r8}
146        add r1, r1, r2
147        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
148        pld [r1]
149        subs r3, r3, #1
150        stmia r0, {r9-r12}
151        add r0, r0, r2
152        bne 4b
153        ldmfd sp!, {r4-r11,pc}
154        .endfunc
155
156@ ----------------------------------------------------------------
157        .align 5
158function put_pixels8_arm, export=1
159        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160        @ block = word aligned, pixles = unaligned
161        pld [r1]
162        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
163        JMP_ALIGN r5, r1
1641:
165        ldmia r1, {r4-r5}
166        add r1, r1, r2
167        subs r3, r3, #1
168        pld [r1]
169        stmia r0, {r4-r5}
170        add r0, r0, r2
171        bne 1b
172        ldmfd sp!, {r4-r5,pc}
173        .align 5
1742:
175        ldmia r1, {r4-r5, r12}
176        add r1, r1, r2
177        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
178        pld [r1]
179        subs r3, r3, #1
180        stmia r0, {r4-r5}
181        add r0, r0, r2
182        bne 2b
183        ldmfd sp!, {r4-r5,pc}
184        .align 5
1853:
186        ldmia r1, {r4-r5, r12}
187        add r1, r1, r2
188        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
189        pld [r1]
190        subs r3, r3, #1
191        stmia r0, {r4-r5}
192        add r0, r0, r2
193        bne 3b
194        ldmfd sp!, {r4-r5,pc}
195        .align 5
1964:
197        ldmia r1, {r4-r5, r12}
198        add r1, r1, r2
199        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
200        pld [r1]
201        subs r3, r3, #1
202        stmia r0, {r4-r5}
203        add r0, r0, r2
204        bne 4b
205        ldmfd sp!, {r4-r5,pc}
206        .endfunc
207
208@ ----------------------------------------------------------------
209        .align 5
210function put_pixels8_x2_arm, export=1
211        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212        @ block = word aligned, pixles = unaligned
213        pld [r1]
214        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
215        ldr r12, =0xfefefefe
216        JMP_ALIGN r5, r1
2171:
218        ldmia r1, {r4-r5, r10}
219        add r1, r1, r2
220        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
221        pld [r1]
222        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
223        subs r3, r3, #1
224        stmia r0, {r8-r9}
225        add r0, r0, r2
226        bne 1b
227        ldmfd sp!, {r4-r10,pc}
228        .align 5
2292:
230        ldmia r1, {r4-r5, r10}
231        add r1, r1, r2
232        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
233        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
234        pld [r1]
235        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
236        subs r3, r3, #1
237        stmia r0, {r4-r5}
238        add r0, r0, r2
239        bne 2b
240        ldmfd sp!, {r4-r10,pc}
241        .align 5
2423:
243        ldmia r1, {r4-r5, r10}
244        add r1, r1, r2
245        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
246        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
247        pld [r1]
248        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
249        subs r3, r3, #1
250        stmia r0, {r4-r5}
251        add r0, r0, r2
252        bne 3b
253        ldmfd sp!, {r4-r10,pc}
254        .align 5
2554:
256        ldmia r1, {r4-r5, r10}
257        add r1, r1, r2
258        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
259        pld [r1]
260        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
261        subs r3, r3, #1
262        stmia r0, {r8-r9}
263        add r0, r0, r2
264        bne 4b
265        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
266        .endfunc
267
268        .align 5
269function put_no_rnd_pixels8_x2_arm, export=1
270        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271        @ block = word aligned, pixles = unaligned
272        pld [r1]
273        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
274        ldr r12, =0xfefefefe
275        JMP_ALIGN r5, r1
2761:
277        ldmia r1, {r4-r5, r10}
278        add r1, r1, r2
279        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
280        pld [r1]
281        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
282        subs r3, r3, #1
283        stmia r0, {r8-r9}
284        add r0, r0, r2
285        bne 1b
286        ldmfd sp!, {r4-r10,pc}
287        .align 5
2882:
289        ldmia r1, {r4-r5, r10}
290        add r1, r1, r2
291        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
292        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
293        pld [r1]
294        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
295        subs r3, r3, #1
296        stmia r0, {r4-r5}
297        add r0, r0, r2
298        bne 2b
299        ldmfd sp!, {r4-r10,pc}
300        .align 5
3013:
302        ldmia r1, {r4-r5, r10}
303        add r1, r1, r2
304        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
305        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
306        pld [r1]
307        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
308        subs r3, r3, #1
309        stmia r0, {r4-r5}
310        add r0, r0, r2
311        bne 3b
312        ldmfd sp!, {r4-r10,pc}
313        .align 5
3144:
315        ldmia r1, {r4-r5, r10}
316        add r1, r1, r2
317        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
318        pld [r1]
319        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
320        subs r3, r3, #1
321        stmia r0, {r8-r9}
322        add r0, r0, r2
323        bne 4b
324        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
325        .endfunc
326
327
328@ ----------------------------------------------------------------
329        .align 5
330function put_pixels8_y2_arm, export=1
331        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332        @ block = word aligned, pixles = unaligned
333        pld [r1]
334        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
335        mov r3, r3, lsr #1
336        ldr r12, =0xfefefefe
337        JMP_ALIGN r5, r1
3381:
339        ldmia r1, {r4-r5}
340        add r1, r1, r2
3416:      ldmia r1, {r6-r7}
342        add r1, r1, r2
343        pld [r1]
344        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
345        ldmia r1, {r4-r5}
346        add r1, r1, r2
347        stmia r0, {r8-r9}
348        add r0, r0, r2
349        pld [r1]
350        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
351        subs r3, r3, #1
352        stmia r0, {r8-r9}
353        add r0, r0, r2
354        bne 6b
355        ldmfd sp!, {r4-r11,pc}
356        .align 5
3572:
358        ldmia r1, {r4-r6}
359        add r1, r1, r2
360        pld [r1]
361        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
3626:      ldmia r1, {r7-r9}
363        add r1, r1, r2
364        pld [r1]
365        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
366        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
367        stmia r0, {r10-r11}
368        add r0, r0, r2
369        ldmia r1, {r4-r6}
370        add r1, r1, r2
371        pld [r1]
372        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
373        subs r3, r3, #1
374        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
375        stmia r0, {r10-r11}
376        add r0, r0, r2
377        bne 6b
378        ldmfd sp!, {r4-r11,pc}
379        .align 5
3803:
381        ldmia r1, {r4-r6}
382        add r1, r1, r2
383        pld [r1]
384        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
3856:      ldmia r1, {r7-r9}
386        add r1, r1, r2
387        pld [r1]
388        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
389        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
390        stmia r0, {r10-r11}
391        add r0, r0, r2
392        ldmia r1, {r4-r6}
393        add r1, r1, r2
394        pld [r1]
395        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
396        subs r3, r3, #1
397        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
398        stmia r0, {r10-r11}
399        add r0, r0, r2
400        bne 6b
401        ldmfd sp!, {r4-r11,pc}
402        .align 5
4034:
404        ldmia r1, {r4-r6}
405        add r1, r1, r2
406        pld [r1]
407        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
4086:      ldmia r1, {r7-r9}
409        add r1, r1, r2
410        pld [r1]
411        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
412        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
413        stmia r0, {r10-r11}
414        add r0, r0, r2
415        ldmia r1, {r4-r6}
416        add r1, r1, r2
417        pld [r1]
418        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
419        subs r3, r3, #1
420        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
421        stmia r0, {r10-r11}
422        add r0, r0, r2
423        bne 6b
424        ldmfd sp!, {r4-r11,pc}
425        .endfunc
426
427        .align 5
428function put_no_rnd_pixels8_y2_arm, export=1
429        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430        @ block = word aligned, pixles = unaligned
431        pld [r1]
432        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
433        mov r3, r3, lsr #1
434        ldr r12, =0xfefefefe
435        JMP_ALIGN r5, r1
4361:
437        ldmia r1, {r4-r5}
438        add r1, r1, r2
4396:      ldmia r1, {r6-r7}
440        add r1, r1, r2
441        pld [r1]
442        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
443        ldmia r1, {r4-r5}
444        add r1, r1, r2
445        stmia r0, {r8-r9}
446        add r0, r0, r2
447        pld [r1]
448        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
449        subs r3, r3, #1
450        stmia r0, {r8-r9}
451        add r0, r0, r2
452        bne 6b
453        ldmfd sp!, {r4-r11,pc}
454        .align 5
4552:
456        ldmia r1, {r4-r6}
457        add r1, r1, r2
458        pld [r1]
459        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
4606:      ldmia r1, {r7-r9}
461        add r1, r1, r2
462        pld [r1]
463        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
464        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
465        stmia r0, {r10-r11}
466        add r0, r0, r2
467        ldmia r1, {r4-r6}
468        add r1, r1, r2
469        pld [r1]
470        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
471        subs r3, r3, #1
472        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
473        stmia r0, {r10-r11}
474        add r0, r0, r2
475        bne 6b
476        ldmfd sp!, {r4-r11,pc}
477        .align 5
4783:
479        ldmia r1, {r4-r6}
480        add r1, r1, r2
481        pld [r1]
482        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
4836:      ldmia r1, {r7-r9}
484        add r1, r1, r2
485        pld [r1]
486        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
487        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
488        stmia r0, {r10-r11}
489        add r0, r0, r2
490        ldmia r1, {r4-r6}
491        add r1, r1, r2
492        pld [r1]
493        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
494        subs r3, r3, #1
495        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
496        stmia r0, {r10-r11}
497        add r0, r0, r2
498        bne 6b
499        ldmfd sp!, {r4-r11,pc}
500        .align 5
5014:
502        ldmia r1, {r4-r6}
503        add r1, r1, r2
504        pld [r1]
505        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
5066:      ldmia r1, {r7-r9}
507        add r1, r1, r2
508        pld [r1]
509        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
510        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
511        stmia r0, {r10-r11}
512        add r0, r0, r2
513        ldmia r1, {r4-r6}
514        add r1, r1, r2
515        pld [r1]
516        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
517        subs r3, r3, #1
518        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
519        stmia r0, {r10-r11}
520        add r0, r0, r2
521        bne 6b
522        ldmfd sp!, {r4-r11,pc}
523        .endfunc
524
525        .ltorg
526
527@ ----------------------------------------------------------------
528.macro  RND_XY2_IT align, rnd
529        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531.if \align == 0
532        ldmia r1, {r6-r8}
533.elseif \align == 3
534        ldmia r1, {r5-r7}
535.else
536        ldmia r1, {r8-r10}
537.endif
538        add r1, r1, r2
539        pld [r1]
540.if \align == 0
541        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
542.elseif \align == 1
543        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
544        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
545.elseif \align == 2
546        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
547        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
548.elseif \align == 3
549        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
550.endif
551        ldr r14, =0x03030303
552        tst r3, #1
553        and r8, r4, r14
554        and r9, r5, r14
555        and r10, r6, r14
556        and r11, r7, r14
557        andeq r14, r14, r14, \rnd #1
558        add r8, r8, r10
559        add r9, r9, r11
560        ldr r12, =0xfcfcfcfc >> 2
561        addeq r8, r8, r14
562        addeq r9, r9, r14
563        and r4, r12, r4, lsr #2
564        and r5, r12, r5, lsr #2
565        and r6, r12, r6, lsr #2
566        and r7, r12, r7, lsr #2
567        add r10, r4, r6
568        add r11, r5, r7
569        subs r3, r3, #1
570.endm
571
572.macro RND_XY2_EXPAND align, rnd
573        RND_XY2_IT \align, \rnd
5746:      stmfd sp!, {r8-r11}
575        RND_XY2_IT \align, \rnd
576        ldmfd sp!, {r4-r7}
577        add r4, r4, r8
578        add r5, r5, r9
579        ldr r14, =0x0f0f0f0f
580        add r6, r6, r10
581        add r7, r7, r11
582        and r4, r14, r4, lsr #2
583        and r5, r14, r5, lsr #2
584        add r4, r4, r6
585        add r5, r5, r7
586        stmia r0, {r4-r5}
587        add r0, r0, r2
588        bge 6b
589        ldmfd sp!, {r4-r11,pc}
590.endm
591
592        .align 5
593function put_pixels8_xy2_arm, export=1
594        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595        @ block = word aligned, pixles = unaligned
596        pld [r1]
597        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
598        JMP_ALIGN r5, r1
5991:
600        RND_XY2_EXPAND 0, lsl
601
602        .align 5
6032:
604        RND_XY2_EXPAND 1, lsl
605
606        .align 5
6073:
608        RND_XY2_EXPAND 2, lsl
609
610        .align 5
6114:
612        RND_XY2_EXPAND 3, lsl
613        .endfunc
614
615        .align 5
616function put_no_rnd_pixels8_xy2_arm, export=1
617        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
618        @ block = word aligned, pixles = unaligned
619        pld [r1]
620        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
621        JMP_ALIGN r5, r1
6221:
623        RND_XY2_EXPAND 0, lsr
624
625        .align 5
6262:
627        RND_XY2_EXPAND 1, lsr
628
629        .align 5
6303:
631        RND_XY2_EXPAND 2, lsr
632
633        .align 5
6344:
635        RND_XY2_EXPAND 3, lsr
636        .endfunc
637
638        .align 5
639@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
640function ff_add_pixels_clamped_ARM, export=1
641        push            {r4-r10}
642        mov             r10, #8
6431:
644        ldr             r4,  [r1]               /* load dest */
645        /* block[0] and block[1]*/
646        ldrsh           r5,  [r0]
647        ldrsh           r7,  [r0, #2]
648        and             r6,  r4,  #0xFF
649        and             r8,  r4,  #0xFF00
650        add             r6,  r5,  r6
651        add             r8,  r7,  r8,  lsr #8
652        mvn             r5,  r5
653        mvn             r7,  r7
654        tst             r6,  #0x100
655        movne           r6,  r5,  lsr #24
656        tst             r8,  #0x100
657        movne           r8,  r7,  lsr #24
658        mov             r9,  r6
659        ldrsh           r5,  [r0, #4]           /* moved form [A] */
660        orr             r9,  r9,  r8, lsl #8
661        /* block[2] and block[3] */
662        /* [A] */
663        ldrsh           r7,  [r0, #6]
664        and             r6,  r4,  #0xFF0000
665        and             r8,  r4,  #0xFF000000
666        add             r6,  r5,  r6, lsr #16
667        add             r8,  r7,  r8, lsr #24
668        mvn             r5,  r5
669        mvn             r7,  r7
670        tst             r6,  #0x100
671        movne           r6,  r5,  lsr #24
672        tst             r8,  #0x100
673        movne           r8,  r7,  lsr #24
674        orr             r9,  r9,  r6, lsl #16
675        ldr             r4,  [r1, #4]           /* moved form [B] */
676        orr             r9,  r9,  r8, lsl #24
677        /* store dest */
678        ldrsh           r5,  [r0, #8]           /* moved form [C] */
679        str             r9,  [r1]
680
681        /* load dest */
682        /* [B] */
683        /* block[4] and block[5] */
684        /* [C] */
685        ldrsh           r7,  [r0, #10]
686        and             r6,  r4,  #0xFF
687        and             r8,  r4,  #0xFF00
688        add             r6,  r5,  r6
689        add             r8,  r7,  r8, lsr #8
690        mvn             r5,  r5
691        mvn             r7,  r7
692        tst             r6,  #0x100
693        movne           r6,  r5,  lsr #24
694        tst             r8,  #0x100
695        movne           r8,  r7,  lsr #24
696        mov             r9,  r6
697        ldrsh           r5,  [r0, #12]          /* moved from [D] */
698        orr             r9,  r9,  r8, lsl #8
699        /* block[6] and block[7] */
700        /* [D] */
701        ldrsh           r7,  [r0, #14]
702        and             r6,  r4,  #0xFF0000
703        and             r8,  r4,  #0xFF000000
704        add             r6,  r5,  r6, lsr #16
705        add             r8,  r7,  r8, lsr #24
706        mvn             r5,  r5
707        mvn             r7,  r7
708        tst             r6,  #0x100
709        movne           r6,  r5,  lsr #24
710        tst             r8,  #0x100
711        movne           r8,  r7,  lsr #24
712        orr             r9,  r9,  r6, lsl #16
713        add             r0,  r0,  #16           /* moved from [E] */
714        orr             r9,  r9,  r8, lsl #24
715        subs            r10, r10, #1            /* moved from [F] */
716        /* store dest */
717        str             r9,  [r1, #4]
718
719        /* [E] */
720        /* [F] */
721        add             r1,  r1,  r2
722        bne             1b
723
724        pop             {r4-r10}
725        bx              lr
726        .endfunc
727