1/*
2 * Alpha optimized DSP utils
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "regdef.h"
23
24/* Some nicer register names.  */
25#define ta t10
26#define tb t11
27#define tc t12
28#define td AT
29/* Danger: these overlap with the argument list and the return value */
30#define te a5
31#define tf a4
32#define tg a3
33#define th v0
34
35        .set noat
36        .set noreorder
37        .arch pca56
38        .text
39
40/*****************************************************************************
41 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
42 *
43 * This code is written with a pca56 in mind. For ev6, one should
44 * really take the increased latency of 3 cycles for MVI instructions
45 * into account.
46 *
47 * It is important to keep the loading and first use of a register as
48 * far apart as possible, because if a register is accessed before it
49 * has been fetched from memory, the CPU will stall.
50 */
51        .align 4
52        .globl pix_abs16x16_mvi_asm
53        .ent pix_abs16x16_mvi_asm
54pix_abs16x16_mvi_asm:
55        .frame sp, 0, ra, 0
56        .prologue 0
57
58#if CONFIG_GPROF
59        lda     AT, _mcount
60        jsr     AT, (AT), _mcount
61#endif
62
63        and     a2, 7, t0
64        clr     v0
65        beq     t0, $aligned
66        .align 4
67$unaligned:
68        /* Registers:
69           line 0:
70           t0:  left_u -> left lo -> left
71           t1:  mid
72           t2:  right_u -> right hi -> right
73           t3:  ref left
74           t4:  ref right
75           line 1:
76           t5:  left_u -> left lo -> left
77           t6:  mid
78           t7:  right_u -> right hi -> right
79           t8:  ref left
80           t9:  ref right
81           temp:
82           ta:  left hi
83           tb:  right lo
84           tc:  error left
85           td:  error right  */
86
87        /* load line 0 */
88        ldq_u   t0, 0(a2)       # left_u
89        ldq_u   t1, 8(a2)       # mid
90        ldq_u   t2, 16(a2)      # right_u
91        ldq     t3, 0(a1)       # ref left
92        ldq     t4, 8(a1)       # ref right
93        addq    a1, a3, a1      # pix1
94        addq    a2, a3, a2      # pix2
95        /* load line 1 */
96        ldq_u   t5, 0(a2)       # left_u
97        ldq_u   t6, 8(a2)       # mid
98        ldq_u   t7, 16(a2)      # right_u
99        ldq     t8, 0(a1)       # ref left
100        ldq     t9, 8(a1)       # ref right
101        addq    a1, a3, a1      # pix1
102        addq    a2, a3, a2      # pix2
103        /* calc line 0 */
104        extql   t0, a2, t0      # left lo
105        extqh   t1, a2, ta      # left hi
106        extql   t1, a2, tb      # right lo
107        or      t0, ta, t0      # left
108        extqh   t2, a2, t2      # right hi
109        perr    t3, t0, tc      # error left
110        or      t2, tb, t2      # right
111        perr    t4, t2, td      # error right
112        addq    v0, tc, v0      # add error left
113        addq    v0, td, v0      # add error left
114        /* calc line 1 */
115        extql   t5, a2, t5      # left lo
116        extqh   t6, a2, ta      # left hi
117        extql   t6, a2, tb      # right lo
118        or      t5, ta, t5      # left
119        extqh   t7, a2, t7      # right hi
120        perr    t8, t5, tc      # error left
121        or      t7, tb, t7      # right
122        perr    t9, t7, td      # error right
123        addq    v0, tc, v0      # add error left
124        addq    v0, td, v0      # add error left
125        /* loop */
126        subq    a4,  2, a4      # h -= 2
127        bne     a4, $unaligned
128        ret
129
130        .align 4
131$aligned:
132        /* load line 0 */
133        ldq     t0, 0(a2)       # left
134        ldq     t1, 8(a2)       # right
135        addq    a2, a3, a2      # pix2
136        ldq     t2, 0(a1)       # ref left
137        ldq     t3, 8(a1)       # ref right
138        addq    a1, a3, a1      # pix1
139        /* load line 1 */
140        ldq     t4, 0(a2)       # left
141        ldq     t5, 8(a2)       # right
142        addq    a2, a3, a2      # pix2
143        ldq     t6, 0(a1)       # ref left
144        ldq     t7, 8(a1)       # ref right
145        addq    a1, a3, a1      # pix1
146        /* load line 2 */
147        ldq     t8, 0(a2)       # left
148        ldq     t9, 8(a2)       # right
149        addq    a2, a3, a2      # pix2
150        ldq     ta, 0(a1)       # ref left
151        ldq     tb, 8(a1)       # ref right
152        addq    a1, a3, a1      # pix1
153        /* load line 3 */
154        ldq     tc, 0(a2)       # left
155        ldq     td, 8(a2)       # right
156        addq    a2, a3, a2      # pix2
157        ldq     te, 0(a1)       # ref left
158        ldq     a0, 8(a1)       # ref right
159        /* calc line 0 */
160        perr    t0, t2, t0      # error left
161        addq    a1, a3, a1      # pix1
162        perr    t1, t3, t1      # error right
163        addq    v0, t0, v0      # add error left
164        /* calc line 1 */
165        perr    t4, t6, t0      # error left
166        addq    v0, t1, v0      # add error right
167        perr    t5, t7, t1      # error right
168        addq    v0, t0, v0      # add error left
169        /* calc line 2 */
170        perr    t8, ta, t0      # error left
171        addq    v0, t1, v0      # add error right
172        perr    t9, tb, t1      # error right
173        addq    v0, t0, v0      # add error left
174        /* calc line 3 */
175        perr    tc, te, t0      # error left
176        addq    v0, t1, v0      # add error right
177        perr    td, a0, t1      # error right
178        addq    v0, t0, v0      # add error left
179        addq    v0, t1, v0      # add error right
180        /* loop */
181        subq    a4,  4, a4      # h -= 4
182        bne     a4, $aligned
183        ret
184        .end pix_abs16x16_mvi_asm
185