1@
2@ ARMv4-optimized IDCT functions
3@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4@
5@ This file is part of FFmpeg.
6@
7@ FFmpeg is free software; you can redistribute it and/or
8@ modify it under the terms of the GNU Lesser General Public
9@ License as published by the Free Software Foundation; either
10@ version 2.1 of the License, or (at your option) any later version.
11@
12@ FFmpeg is distributed in the hope that it will be useful,
13@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15@ Lesser General Public License for more details.
16@
17@ You should have received a copy of the GNU Lesser General Public
18@ License along with FFmpeg; if not, write to the Free Software
19@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20@
21
22#include "config.h"
23#include "libavutil/arm/asm.S"
24
25@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
26function ff_add_pixels_clamped_arm, export=1, align=5
27        push            {r4-r10}
28        mov             r10, #8
291:
30        ldr             r4,  [r1]               /* load dest */
31        /* block[0] and block[1]*/
32        ldrsh           r5,  [r0]
33        ldrsh           r7,  [r0, #2]
34        and             r6,  r4,  #0xFF
35        and             r8,  r4,  #0xFF00
36        add             r6,  r6,  r5
37        add             r8,  r7,  r8,  lsr #8
38        mvn             r5,  r5
39        mvn             r7,  r7
40        tst             r6,  #0x100
41        it              ne
42        movne           r6,  r5,  lsr #24
43        tst             r8,  #0x100
44        it              ne
45        movne           r8,  r7,  lsr #24
46        mov             r9,  r6
47        ldrsh           r5,  [r0, #4]           /* moved form [A] */
48        orr             r9,  r9,  r8,  lsl #8
49        /* block[2] and block[3] */
50        /* [A] */
51        ldrsh           r7,  [r0, #6]
52        and             r6,  r4,  #0xFF0000
53        and             r8,  r4,  #0xFF000000
54        add             r6,  r5,  r6,  lsr #16
55        add             r8,  r7,  r8,  lsr #24
56        mvn             r5,  r5
57        mvn             r7,  r7
58        tst             r6,  #0x100
59        it              ne
60        movne           r6,  r5,  lsr #24
61        tst             r8,  #0x100
62        it              ne
63        movne           r8,  r7,  lsr #24
64        orr             r9,  r9,  r6,  lsl #16
65        ldr             r4,  [r1, #4]           /* moved form [B] */
66        orr             r9,  r9,  r8,  lsl #24
67        /* store dest */
68        ldrsh           r5,  [r0, #8]           /* moved form [C] */
69        str             r9,  [r1]
70
71        /* load dest */
72        /* [B] */
73        /* block[4] and block[5] */
74        /* [C] */
75        ldrsh           r7,  [r0, #10]
76        and             r6,  r4,  #0xFF
77        and             r8,  r4,  #0xFF00
78        add             r6,  r6,  r5
79        add             r8,  r7,  r8,  lsr #8
80        mvn             r5,  r5
81        mvn             r7,  r7
82        tst             r6,  #0x100
83        it              ne
84        movne           r6,  r5,  lsr #24
85        tst             r8,  #0x100
86        it              ne
87        movne           r8,  r7,  lsr #24
88        mov             r9,  r6
89        ldrsh           r5,  [r0, #12]          /* moved from [D] */
90        orr             r9,  r9,  r8,  lsl #8
91        /* block[6] and block[7] */
92        /* [D] */
93        ldrsh           r7,  [r0, #14]
94        and             r6,  r4,  #0xFF0000
95        and             r8,  r4,  #0xFF000000
96        add             r6,  r5,  r6,  lsr #16
97        add             r8,  r7,  r8,  lsr #24
98        mvn             r5,  r5
99        mvn             r7,  r7
100        tst             r6,  #0x100
101        it              ne
102        movne           r6,  r5,  lsr #24
103        tst             r8,  #0x100
104        it              ne
105        movne           r8,  r7,  lsr #24
106        orr             r9,  r9,  r6,  lsl #16
107        add             r0,  r0,  #16           /* moved from [E] */
108        orr             r9,  r9,  r8,  lsl #24
109        subs            r10, r10, #1            /* moved from [F] */
110        /* store dest */
111        str             r9,  [r1, #4]
112
113        /* [E] */
114        /* [F] */
115        add             r1,  r1,  r2
116        bne             1b
117
118        pop             {r4-r10}
119        bx              lr
120endfunc
121