1/* 2 * Alpha optimized DSP utils 3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22#include "regdef.h" 23 24/* Some nicer register names. */ 25#define ta t10 26#define tb t11 27#define tc t12 28#define td AT 29/* Danger: these overlap with the argument list and the return value */ 30#define te a5 31#define tf a4 32#define tg a3 33#define th v0 34 35 .set noat 36 .set noreorder 37 .arch pca56 38 .text 39 40/***************************************************************************** 41 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) 42 * 43 * This code is written with a pca56 in mind. For ev6, one should 44 * really take the increased latency of 3 cycles for MVI instructions 45 * into account. 46 * 47 * It is important to keep the loading and first use of a register as 48 * far apart as possible, because if a register is accessed before it 49 * has been fetched from memory, the CPU will stall. 50 */ 51 .align 4 52 .globl pix_abs16x16_mvi_asm 53 .ent pix_abs16x16_mvi_asm 54pix_abs16x16_mvi_asm: 55 .frame sp, 0, ra, 0 56 .prologue 0 57 58#if CONFIG_GPROF 59 lda AT, _mcount 60 jsr AT, (AT), _mcount 61#endif 62 63 and a2, 7, t0 64 clr v0 65 beq t0, $aligned 66 .align 4 67$unaligned: 68 /* Registers: 69 line 0: 70 t0: left_u -> left lo -> left 71 t1: mid 72 t2: right_u -> right hi -> right 73 t3: ref left 74 t4: ref right 75 line 1: 76 t5: left_u -> left lo -> left 77 t6: mid 78 t7: right_u -> right hi -> right 79 t8: ref left 80 t9: ref right 81 temp: 82 ta: left hi 83 tb: right lo 84 tc: error left 85 td: error right */ 86 87 /* load line 0 */ 88 ldq_u t0, 0(a2) # left_u 89 ldq_u t1, 8(a2) # mid 90 ldq_u t2, 16(a2) # right_u 91 ldq t3, 0(a1) # ref left 92 ldq t4, 8(a1) # ref right 93 addq a1, a3, a1 # pix1 94 addq a2, a3, a2 # pix2 95 /* load line 1 */ 96 ldq_u t5, 0(a2) # left_u 97 ldq_u t6, 8(a2) # mid 98 ldq_u t7, 16(a2) # right_u 99 ldq t8, 0(a1) # ref left 100 ldq t9, 8(a1) # ref right 101 addq a1, a3, a1 # pix1 102 addq a2, a3, a2 # pix2 103 /* calc line 0 */ 104 extql t0, a2, t0 # left lo 105 extqh t1, a2, ta # left hi 106 extql t1, a2, tb # right lo 107 or t0, ta, t0 # left 108 extqh t2, a2, t2 # right hi 109 perr t3, t0, tc # error left 110 or t2, tb, t2 # right 111 perr t4, t2, td # error right 112 addq v0, tc, v0 # add error left 113 addq v0, td, v0 # add error left 114 /* calc line 1 */ 115 extql t5, a2, t5 # left lo 116 extqh t6, a2, ta # left hi 117 extql t6, a2, tb # right lo 118 or t5, ta, t5 # left 119 extqh t7, a2, t7 # right hi 120 perr t8, t5, tc # error left 121 or t7, tb, t7 # right 122 perr t9, t7, td # error right 123 addq v0, tc, v0 # add error left 124 addq v0, td, v0 # add error left 125 /* loop */ 126 subq a4, 2, a4 # h -= 2 127 bne a4, $unaligned 128 ret 129 130 .align 4 131$aligned: 132 /* load line 0 */ 133 ldq t0, 0(a2) # left 134 ldq t1, 8(a2) # right 135 addq a2, a3, a2 # pix2 136 ldq t2, 0(a1) # ref left 137 ldq t3, 8(a1) # ref right 138 addq a1, a3, a1 # pix1 139 /* load line 1 */ 140 ldq t4, 0(a2) # left 141 ldq t5, 8(a2) # right 142 addq a2, a3, a2 # pix2 143 ldq t6, 0(a1) # ref left 144 ldq t7, 8(a1) # ref right 145 addq a1, a3, a1 # pix1 146 /* load line 2 */ 147 ldq t8, 0(a2) # left 148 ldq t9, 8(a2) # right 149 addq a2, a3, a2 # pix2 150 ldq ta, 0(a1) # ref left 151 ldq tb, 8(a1) # ref right 152 addq a1, a3, a1 # pix1 153 /* load line 3 */ 154 ldq tc, 0(a2) # left 155 ldq td, 8(a2) # right 156 addq a2, a3, a2 # pix2 157 ldq te, 0(a1) # ref left 158 ldq a0, 8(a1) # ref right 159 /* calc line 0 */ 160 perr t0, t2, t0 # error left 161 addq a1, a3, a1 # pix1 162 perr t1, t3, t1 # error right 163 addq v0, t0, v0 # add error left 164 /* calc line 1 */ 165 perr t4, t6, t0 # error left 166 addq v0, t1, v0 # add error right 167 perr t5, t7, t1 # error right 168 addq v0, t0, v0 # add error left 169 /* calc line 2 */ 170 perr t8, ta, t0 # error left 171 addq v0, t1, v0 # add error right 172 perr t9, tb, t1 # error right 173 addq v0, t0, v0 # add error left 174 /* calc line 3 */ 175 perr tc, te, t0 # error left 176 addq v0, t1, v0 # add error right 177 perr td, a0, t1 # error right 178 addq v0, t0, v0 # add error left 179 addq v0, t1, v0 # add error right 180 /* loop */ 181 subq a4, 4, a4 # h -= 4 182 bne a4, $aligned 183 ret 184 .end pix_abs16x16_mvi_asm 185