1/* 2 C-like prototype : 3 void j_rev_dct_ARM(DCTBLOCK data) 4 5 With DCTBLOCK being a pointer to an array of 64 'signed shorts' 6 7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) 8 9 Permission is hereby granted, free of charge, to any person obtaining a copy 10 of this software and associated documentation files (the "Software"), to deal 11 in the Software without restriction, including without limitation the rights 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 copies of the Software, and to permit persons to whom the Software is 14 furnished to do so, subject to the following conditions: 15 16 The above copyright notice and this permission notice shall be included in 17 all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26*/ 27 28#include "asm.S" 29 30#define FIX_0_298631336 2446 31#define FIX_0_541196100 4433 32#define FIX_0_765366865 6270 33#define FIX_1_175875602 9633 34#define FIX_1_501321110 12299 35#define FIX_2_053119869 16819 36#define FIX_3_072711026 25172 37#define FIX_M_0_390180644 -3196 38#define FIX_M_0_899976223 -7373 39#define FIX_M_1_847759065 -15137 40#define FIX_M_1_961570560 -16069 41#define FIX_M_2_562915447 -20995 42#define FIX_0xFFFF 0xFFFF 43 44#define FIX_0_298631336_ID 0 45#define FIX_0_541196100_ID 4 46#define FIX_0_765366865_ID 8 47#define FIX_1_175875602_ID 12 48#define FIX_1_501321110_ID 16 49#define FIX_2_053119869_ID 20 50#define FIX_3_072711026_ID 24 51#define FIX_M_0_390180644_ID 28 52#define FIX_M_0_899976223_ID 32 53#define FIX_M_1_847759065_ID 36 54#define FIX_M_1_961570560_ID 40 55#define FIX_M_2_562915447_ID 44 56#define FIX_0xFFFF_ID 48 57 .text 58 .align 59 60function j_rev_dct_ARM, export=1 61 stmdb sp!, { r4 - r12, lr } @ all callee saved regs 62 63 sub sp, sp, #4 @ reserve some space on the stack 64 str r0, [ sp ] @ save the DCT pointer to the stack 65 66 mov lr, r0 @ lr = pointer to the current row 67 mov r12, #8 @ r12 = row-counter 68 add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array 69row_loop: 70 ldrsh r0, [lr, # 0] @ r0 = 'd0' 71 ldrsh r2, [lr, # 2] @ r2 = 'd2' 72 73 @ Optimization for row that have all items except the first set to 0 74 @ (this works as the DCTELEMS are always 4-byte aligned) 75 ldr r5, [lr, # 0] 76 ldr r6, [lr, # 4] 77 ldr r3, [lr, # 8] 78 ldr r4, [lr, #12] 79 orr r3, r3, r4 80 orr r3, r3, r6 81 orrs r5, r3, r5 82 beq end_of_row_loop @ nothing to be done as ALL of them are '0' 83 orrs r3, r3, r2 84 beq empty_row 85 86 ldrsh r1, [lr, # 8] @ r1 = 'd1' 87 ldrsh r4, [lr, # 4] @ r4 = 'd4' 88 ldrsh r6, [lr, # 6] @ r6 = 'd6' 89 90 ldr r3, [r11, #FIX_0_541196100_ID] 91 add r7, r2, r6 92 ldr r5, [r11, #FIX_M_1_847759065_ID] 93 mul r7, r3, r7 @ r7 = z1 94 ldr r3, [r11, #FIX_0_765366865_ID] 95 mla r6, r5, r6, r7 @ r6 = tmp2 96 add r5, r0, r4 @ r5 = tmp0 97 mla r2, r3, r2, r7 @ r2 = tmp3 98 sub r3, r0, r4 @ r3 = tmp1 99 100 add r0, r2, r5, lsl #13 @ r0 = tmp10 101 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 102 add r4, r6, r3, lsl #13 @ r4 = tmp11 103 rsb r3, r6, r3, lsl #13 @ r3 = tmp12 104 105 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 106 107 ldrsh r3, [lr, #10] @ r3 = 'd3' 108 ldrsh r5, [lr, #12] @ r5 = 'd5' 109 ldrsh r7, [lr, #14] @ r7 = 'd7' 110 111 add r0, r3, r5 @ r0 = 'z2' 112 add r2, r1, r7 @ r2 = 'z1' 113 add r4, r3, r7 @ r4 = 'z3' 114 add r6, r1, r5 @ r6 = 'z4' 115 ldr r9, [r11, #FIX_1_175875602_ID] 116 add r8, r4, r6 @ r8 = z3 + z4 117 ldr r10, [r11, #FIX_M_0_899976223_ID] 118 mul r8, r9, r8 @ r8 = 'z5' 119 ldr r9, [r11, #FIX_M_2_562915447_ID] 120 mul r2, r10, r2 @ r2 = 'z1' 121 ldr r10, [r11, #FIX_M_1_961570560_ID] 122 mul r0, r9, r0 @ r0 = 'z2' 123 ldr r9, [r11, #FIX_M_0_390180644_ID] 124 mla r4, r10, r4, r8 @ r4 = 'z3' 125 ldr r10, [r11, #FIX_0_298631336_ID] 126 mla r6, r9, r6, r8 @ r6 = 'z4' 127 ldr r9, [r11, #FIX_2_053119869_ID] 128 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 129 ldr r10, [r11, #FIX_3_072711026_ID] 130 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 131 ldr r9, [r11, #FIX_1_501321110_ID] 132 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 133 add r7, r7, r4 @ r7 = tmp0 134 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 135 add r5, r5, r6 @ r5 = tmp1 136 add r3, r3, r4 @ r3 = tmp2 137 add r1, r1, r6 @ r1 = tmp3 138 139 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 140 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 141 142 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) 143 add r8, r0, r1 144 add r8, r8, #(1<<10) 145 mov r8, r8, asr #11 146 strh r8, [lr, # 0] 147 148 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) 149 sub r8, r0, r1 150 add r8, r8, #(1<<10) 151 mov r8, r8, asr #11 152 strh r8, [lr, #14] 153 154 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) 155 add r8, r6, r3 156 add r8, r8, #(1<<10) 157 mov r8, r8, asr #11 158 strh r8, [lr, # 2] 159 160 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) 161 sub r8, r6, r3 162 add r8, r8, #(1<<10) 163 mov r8, r8, asr #11 164 strh r8, [lr, #12] 165 166 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) 167 add r8, r4, r5 168 add r8, r8, #(1<<10) 169 mov r8, r8, asr #11 170 strh r8, [lr, # 4] 171 172 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) 173 sub r8, r4, r5 174 add r8, r8, #(1<<10) 175 mov r8, r8, asr #11 176 strh r8, [lr, #10] 177 178 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) 179 add r8, r2, r7 180 add r8, r8, #(1<<10) 181 mov r8, r8, asr #11 182 strh r8, [lr, # 6] 183 184 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) 185 sub r8, r2, r7 186 add r8, r8, #(1<<10) 187 mov r8, r8, asr #11 188 strh r8, [lr, # 8] 189 190 @ End of row loop 191 add lr, lr, #16 192 subs r12, r12, #1 193 bne row_loop 194 beq start_column_loop 195 196empty_row: 197 ldr r1, [r11, #FIX_0xFFFF_ID] 198 mov r0, r0, lsl #2 199 and r0, r0, r1 200 add r0, r0, r0, lsl #16 201 str r0, [lr, # 0] 202 str r0, [lr, # 4] 203 str r0, [lr, # 8] 204 str r0, [lr, #12] 205 206end_of_row_loop: 207 @ End of loop 208 add lr, lr, #16 209 subs r12, r12, #1 210 bne row_loop 211 212start_column_loop: 213 @ Start of column loop 214 ldr lr, [ sp ] 215 mov r12, #8 216column_loop: 217 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' 218 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' 219 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' 220 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' 221 222 ldr r3, [r11, #FIX_0_541196100_ID] 223 add r1, r2, r6 224 ldr r5, [r11, #FIX_M_1_847759065_ID] 225 mul r1, r3, r1 @ r1 = z1 226 ldr r3, [r11, #FIX_0_765366865_ID] 227 mla r6, r5, r6, r1 @ r6 = tmp2 228 add r5, r0, r4 @ r5 = tmp0 229 mla r2, r3, r2, r1 @ r2 = tmp3 230 sub r3, r0, r4 @ r3 = tmp1 231 232 add r0, r2, r5, lsl #13 @ r0 = tmp10 233 rsb r2, r2, r5, lsl #13 @ r2 = tmp13 234 add r4, r6, r3, lsl #13 @ r4 = tmp11 235 rsb r6, r6, r3, lsl #13 @ r6 = tmp12 236 237 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' 238 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' 239 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' 240 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' 241 242 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) 243 orr r9, r1, r3 244 orr r10, r5, r7 245 orrs r10, r9, r10 246 beq empty_odd_column 247 248 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 249 250 add r0, r3, r5 @ r0 = 'z2' 251 add r2, r1, r7 @ r2 = 'z1' 252 add r4, r3, r7 @ r4 = 'z3' 253 add r6, r1, r5 @ r6 = 'z4' 254 ldr r9, [r11, #FIX_1_175875602_ID] 255 add r8, r4, r6 256 ldr r10, [r11, #FIX_M_0_899976223_ID] 257 mul r8, r9, r8 @ r8 = 'z5' 258 ldr r9, [r11, #FIX_M_2_562915447_ID] 259 mul r2, r10, r2 @ r2 = 'z1' 260 ldr r10, [r11, #FIX_M_1_961570560_ID] 261 mul r0, r9, r0 @ r0 = 'z2' 262 ldr r9, [r11, #FIX_M_0_390180644_ID] 263 mla r4, r10, r4, r8 @ r4 = 'z3' 264 ldr r10, [r11, #FIX_0_298631336_ID] 265 mla r6, r9, r6, r8 @ r6 = 'z4' 266 ldr r9, [r11, #FIX_2_053119869_ID] 267 mla r7, r10, r7, r2 @ r7 = tmp0 + z1 268 ldr r10, [r11, #FIX_3_072711026_ID] 269 mla r5, r9, r5, r0 @ r5 = tmp1 + z2 270 ldr r9, [r11, #FIX_1_501321110_ID] 271 mla r3, r10, r3, r0 @ r3 = tmp2 + z2 272 add r7, r7, r4 @ r7 = tmp0 273 mla r1, r9, r1, r2 @ r1 = tmp3 + z1 274 add r5, r5, r6 @ r5 = tmp1 275 add r3, r3, r4 @ r3 = tmp2 276 add r1, r1, r6 @ r1 = tmp3 277 278 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 279 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 280 281 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) 282 add r8, r0, r1 283 add r8, r8, #(1<<17) 284 mov r8, r8, asr #18 285 strh r8, [lr, #( 0*8)] 286 287 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) 288 sub r8, r0, r1 289 add r8, r8, #(1<<17) 290 mov r8, r8, asr #18 291 strh r8, [lr, #(14*8)] 292 293 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) 294 add r8, r4, r3 295 add r8, r8, #(1<<17) 296 mov r8, r8, asr #18 297 strh r8, [lr, #( 2*8)] 298 299 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) 300 sub r8, r4, r3 301 add r8, r8, #(1<<17) 302 mov r8, r8, asr #18 303 strh r8, [lr, #(12*8)] 304 305 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) 306 add r8, r6, r5 307 add r8, r8, #(1<<17) 308 mov r8, r8, asr #18 309 strh r8, [lr, #( 4*8)] 310 311 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) 312 sub r8, r6, r5 313 add r8, r8, #(1<<17) 314 mov r8, r8, asr #18 315 strh r8, [lr, #(10*8)] 316 317 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) 318 add r8, r2, r7 319 add r8, r8, #(1<<17) 320 mov r8, r8, asr #18 321 strh r8, [lr, #( 6*8)] 322 323 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) 324 sub r8, r2, r7 325 add r8, r8, #(1<<17) 326 mov r8, r8, asr #18 327 strh r8, [lr, #( 8*8)] 328 329 @ End of row loop 330 add lr, lr, #2 331 subs r12, r12, #1 332 bne column_loop 333 beq the_end 334 335empty_odd_column: 336 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) 337 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) 338 add r0, r0, #(1<<17) 339 mov r0, r0, asr #18 340 strh r0, [lr, #( 0*8)] 341 strh r0, [lr, #(14*8)] 342 343 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) 344 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) 345 add r4, r4, #(1<<17) 346 mov r4, r4, asr #18 347 strh r4, [lr, #( 2*8)] 348 strh r4, [lr, #(12*8)] 349 350 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) 351 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) 352 add r6, r6, #(1<<17) 353 mov r6, r6, asr #18 354 strh r6, [lr, #( 4*8)] 355 strh r6, [lr, #(10*8)] 356 357 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) 358 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) 359 add r2, r2, #(1<<17) 360 mov r2, r2, asr #18 361 strh r2, [lr, #( 6*8)] 362 strh r2, [lr, #( 8*8)] 363 364 @ End of row loop 365 add lr, lr, #2 366 subs r12, r12, #1 367 bne column_loop 368 369the_end: 370 @ The end.... 371 add sp, sp, #4 372 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return 373 374const_array: 375 .align 376 .word FIX_0_298631336 377 .word FIX_0_541196100 378 .word FIX_0_765366865 379 .word FIX_1_175875602 380 .word FIX_1_501321110 381 .word FIX_2_053119869 382 .word FIX_3_072711026 383 .word FIX_M_0_390180644 384 .word FIX_M_0_899976223 385 .word FIX_M_1_847759065 386 .word FIX_M_1_961570560 387 .word FIX_M_2_562915447 388 .word FIX_0xFFFF 389