1/*
2 * Simple IDCT
3 *
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#include "asm.S"
25
26#define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
27#define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
28#define W3  19266   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
29#define W4  16383   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
30#define W5  12873   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
31#define W6  8867    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
32#define W7  4520    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
33#define ROW_SHIFT 11
34#define COL_SHIFT 20
35
36#define W13 (W1 | (W3 << 16))
37#define W26 (W2 | (W6 << 16))
38#define W57 (W5 | (W7 << 16))
39
40        .text
41        .align
42w13:    .long W13
43w26:    .long W26
44w57:    .long W57
45
46function idct_row_armv5te
47        str    lr, [sp, #-4]!
48
49        ldrd   v1, [a1, #8]
50        ldrd   a3, [a1]              /* a3 = row[1:0], a4 = row[3:2] */
51        orrs   v1, v1, v2
52        cmpeq  v1, a4
53        cmpeq  v1, a3, lsr #16
54        beq    row_dc_only
55
56        mov    v1, #(1<<(ROW_SHIFT-1))
57        mov    ip, #16384
58        sub    ip, ip, #1            /* ip = W4 */
59        smlabb v1, ip, a3, v1        /* v1 = W4*row[0]+(1<<(RS-1)) */
60        ldr    ip, [pc, #(w26-.-8)]  /* ip = W2 | (W6 << 16) */
61        smultb a2, ip, a4
62        smulbb lr, ip, a4
63        add    v2, v1, a2
64        sub    v3, v1, a2
65        sub    v4, v1, lr
66        add    v1, v1, lr
67
68        ldr    ip, [pc, #(w13-.-8)]  /* ip = W1 | (W3 << 16) */
69        ldr    lr, [pc, #(w57-.-8)]  /* lr = W5 | (W7 << 16) */
70        smulbt v5, ip, a3
71        smultt v6, lr, a4
72        smlatt v5, ip, a4, v5
73        smultt a2, ip, a3
74        smulbt v7, lr, a3
75        sub    v6, v6, a2
76        smulbt a2, ip, a4
77        smultt fp, lr, a3
78        sub    v7, v7, a2
79        smulbt a2, lr, a4
80        ldrd   a3, [a1, #8]          /* a3=row[5:4] a4=row[7:6] */
81        sub    fp, fp, a2
82
83        orrs   a2, a3, a4
84        beq    1f
85
86        smlabt v5, lr, a3, v5
87        smlabt v6, ip, a3, v6
88        smlatt v5, lr, a4, v5
89        smlabt v6, lr, a4, v6
90        smlatt v7, lr, a3, v7
91        smlatt fp, ip, a3, fp
92        smulbt a2, ip, a4
93        smlatt v7, ip, a4, v7
94        sub    fp, fp, a2
95
96        ldr    ip, [pc, #(w26-.-8)]  /* ip = W2 | (W6 << 16) */
97        mov    a2, #16384
98        sub    a2, a2, #1            /* a2 =  W4 */
99        smulbb a2, a2, a3            /* a2 =  W4*row[4] */
100        smultb lr, ip, a4            /* lr =  W6*row[6] */
101        add    v1, v1, a2            /* v1 += W4*row[4] */
102        add    v1, v1, lr            /* v1 += W6*row[6] */
103        add    v4, v4, a2            /* v4 += W4*row[4] */
104        sub    v4, v4, lr            /* v4 -= W6*row[6] */
105        smulbb lr, ip, a4            /* lr =  W2*row[6] */
106        sub    v2, v2, a2            /* v2 -= W4*row[4] */
107        sub    v2, v2, lr            /* v2 -= W2*row[6] */
108        sub    v3, v3, a2            /* v3 -= W4*row[4] */
109        add    v3, v3, lr            /* v3 += W2*row[6] */
110
1111:      add    a2, v1, v5
112        mov    a3, a2, lsr #11
113        bic    a3, a3, #0x1f0000
114        sub    a2, v2, v6
115        mov    a2, a2, lsr #11
116        add    a3, a3, a2, lsl #16
117        add    a2, v3, v7
118        mov    a4, a2, lsr #11
119        bic    a4, a4, #0x1f0000
120        add    a2, v4, fp
121        mov    a2, a2, lsr #11
122        add    a4, a4, a2, lsl #16
123        strd   a3, [a1]
124
125        sub    a2, v4, fp
126        mov    a3, a2, lsr #11
127        bic    a3, a3, #0x1f0000
128        sub    a2, v3, v7
129        mov    a2, a2, lsr #11
130        add    a3, a3, a2, lsl #16
131        add    a2, v2, v6
132        mov    a4, a2, lsr #11
133        bic    a4, a4, #0x1f0000
134        sub    a2, v1, v5
135        mov    a2, a2, lsr #11
136        add    a4, a4, a2, lsl #16
137        strd   a3, [a1, #8]
138
139        ldr    pc, [sp], #4
140
141row_dc_only:
142        orr    a3, a3, a3, lsl #16
143        bic    a3, a3, #0xe000
144        mov    a3, a3, lsl #3
145        mov    a4, a3
146        strd   a3, [a1]
147        strd   a3, [a1, #8]
148
149        ldr    pc, [sp], #4
150        .endfunc
151
152        .macro idct_col
153        ldr    a4, [a1]              /* a4 = col[1:0] */
154        mov    ip, #16384
155        sub    ip, ip, #1            /* ip = W4 */
156#if 0
157        mov    v1, #(1<<(COL_SHIFT-1))
158        smlabt v2, ip, a4, v1        /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
159        smlabb v1, ip, a4, v1        /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
160        ldr    a4, [a1, #(16*4)]
161#else
162        mov    v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
163        add    v2, v1, a4, asr #16
164        rsb    v2, v2, v2, lsl #14
165        mov    a4, a4, lsl #16
166        add    v1, v1, a4, asr #16
167        ldr    a4, [a1, #(16*4)]
168        rsb    v1, v1, v1, lsl #14
169#endif
170
171        smulbb lr, ip, a4
172        smulbt a3, ip, a4
173        sub    v3, v1, lr
174        sub    v5, v1, lr
175        add    v7, v1, lr
176        add    v1, v1, lr
177        sub    v4, v2, a3
178        sub    v6, v2, a3
179        add    fp, v2, a3
180        ldr    ip, [pc, #(w26-.-8)]
181        ldr    a4, [a1, #(16*2)]
182        add    v2, v2, a3
183
184        smulbb lr, ip, a4
185        smultb a3, ip, a4
186        add    v1, v1, lr
187        sub    v7, v7, lr
188        add    v3, v3, a3
189        sub    v5, v5, a3
190        smulbt lr, ip, a4
191        smultt a3, ip, a4
192        add    v2, v2, lr
193        sub    fp, fp, lr
194        add    v4, v4, a3
195        ldr    a4, [a1, #(16*6)]
196        sub    v6, v6, a3
197
198        smultb lr, ip, a4
199        smulbb a3, ip, a4
200        add    v1, v1, lr
201        sub    v7, v7, lr
202        sub    v3, v3, a3
203        add    v5, v5, a3
204        smultt lr, ip, a4
205        smulbt a3, ip, a4
206        add    v2, v2, lr
207        sub    fp, fp, lr
208        sub    v4, v4, a3
209        add    v6, v6, a3
210
211        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
212
213        ldr    ip, [pc, #(w13-.-8)]
214        ldr    a4, [a1, #(16*1)]
215        ldr    lr, [pc, #(w57-.-8)]
216        smulbb v1, ip, a4
217        smultb v3, ip, a4
218        smulbb v5, lr, a4
219        smultb v7, lr, a4
220        smulbt v2, ip, a4
221        smultt v4, ip, a4
222        smulbt v6, lr, a4
223        smultt fp, lr, a4
224        rsb    v4, v4, #0
225        ldr    a4, [a1, #(16*3)]
226        rsb    v3, v3, #0
227
228        smlatb v1, ip, a4, v1
229        smlatb v3, lr, a4, v3
230        smulbb a3, ip, a4
231        smulbb a2, lr, a4
232        sub    v5, v5, a3
233        sub    v7, v7, a2
234        smlatt v2, ip, a4, v2
235        smlatt v4, lr, a4, v4
236        smulbt a3, ip, a4
237        smulbt a2, lr, a4
238        sub    v6, v6, a3
239        ldr    a4, [a1, #(16*5)]
240        sub    fp, fp, a2
241
242        smlabb v1, lr, a4, v1
243        smlabb v3, ip, a4, v3
244        smlatb v5, lr, a4, v5
245        smlatb v7, ip, a4, v7
246        smlabt v2, lr, a4, v2
247        smlabt v4, ip, a4, v4
248        smlatt v6, lr, a4, v6
249        ldr    a3, [a1, #(16*7)]
250        smlatt fp, ip, a4, fp
251
252        smlatb v1, lr, a3, v1
253        smlabb v3, lr, a3, v3
254        smlatb v5, ip, a3, v5
255        smulbb a4, ip, a3
256        smlatt v2, lr, a3, v2
257        sub    v7, v7, a4
258        smlabt v4, lr, a3, v4
259        smulbt a4, ip, a3
260        smlatt v6, ip, a3, v6
261        sub    fp, fp, a4
262        .endm
263
264function idct_col_armv5te
265        str    lr, [sp, #-4]!
266
267        idct_col
268
269        ldmfd  sp!, {a3, a4}
270        adds   a2, a3, v1
271        mov    a2, a2, lsr #20
272        orrmi  a2, a2, #0xf000
273        add    ip, a4, v2
274        mov    ip, ip, asr #20
275        orr    a2, a2, ip, lsl #16
276        str    a2, [a1]
277        subs   a3, a3, v1
278        mov    a2, a3, lsr #20
279        orrmi  a2, a2, #0xf000
280        sub    a4, a4, v2
281        mov    a4, a4, asr #20
282        orr    a2, a2, a4, lsl #16
283        ldmfd  sp!, {a3, a4}
284        str    a2, [a1, #(16*7)]
285
286        subs   a2, a3, v3
287        mov    a2, a2, lsr #20
288        orrmi  a2, a2, #0xf000
289        sub    ip, a4, v4
290        mov    ip, ip, asr #20
291        orr    a2, a2, ip, lsl #16
292        str    a2, [a1, #(16*1)]
293        adds   a3, a3, v3
294        mov    a2, a3, lsr #20
295        orrmi  a2, a2, #0xf000
296        add    a4, a4, v4
297        mov    a4, a4, asr #20
298        orr    a2, a2, a4, lsl #16
299        ldmfd  sp!, {a3, a4}
300        str    a2, [a1, #(16*6)]
301
302        adds   a2, a3, v5
303        mov    a2, a2, lsr #20
304        orrmi  a2, a2, #0xf000
305        add    ip, a4, v6
306        mov    ip, ip, asr #20
307        orr    a2, a2, ip, lsl #16
308        str    a2, [a1, #(16*2)]
309        subs   a3, a3, v5
310        mov    a2, a3, lsr #20
311        orrmi  a2, a2, #0xf000
312        sub    a4, a4, v6
313        mov    a4, a4, asr #20
314        orr    a2, a2, a4, lsl #16
315        ldmfd  sp!, {a3, a4}
316        str    a2, [a1, #(16*5)]
317
318        adds   a2, a3, v7
319        mov    a2, a2, lsr #20
320        orrmi  a2, a2, #0xf000
321        add    ip, a4, fp
322        mov    ip, ip, asr #20
323        orr    a2, a2, ip, lsl #16
324        str    a2, [a1, #(16*3)]
325        subs   a3, a3, v7
326        mov    a2, a3, lsr #20
327        orrmi  a2, a2, #0xf000
328        sub    a4, a4, fp
329        mov    a4, a4, asr #20
330        orr    a2, a2, a4, lsl #16
331        str    a2, [a1, #(16*4)]
332
333        ldr    pc, [sp], #4
334        .endfunc
335
336function idct_col_put_armv5te
337        str    lr, [sp, #-4]!
338
339        idct_col
340
341        ldmfd  sp!, {a3, a4}
342        ldr    lr, [sp, #32]
343        add    a2, a3, v1
344        movs   a2, a2, asr #20
345        movmi  a2, #0
346        cmp    a2, #255
347        movgt  a2, #255
348        add    ip, a4, v2
349        movs   ip, ip, asr #20
350        movmi  ip, #0
351        cmp    ip, #255
352        movgt  ip, #255
353        orr    a2, a2, ip, lsl #8
354        sub    a3, a3, v1
355        movs   a3, a3, asr #20
356        movmi  a3, #0
357        cmp    a3, #255
358        movgt  a3, #255
359        sub    a4, a4, v2
360        movs   a4, a4, asr #20
361        movmi  a4, #0
362        cmp    a4, #255
363        ldr    v1, [sp, #28]
364        movgt  a4, #255
365        strh   a2, [v1]
366        add    a2, v1, #2
367        str    a2, [sp, #28]
368        orr    a2, a3, a4, lsl #8
369        rsb    v2, lr, lr, lsl #3
370        ldmfd  sp!, {a3, a4}
371        strh   a2, [v2, v1]!
372
373        sub    a2, a3, v3
374        movs   a2, a2, asr #20
375        movmi  a2, #0
376        cmp    a2, #255
377        movgt  a2, #255
378        sub    ip, a4, v4
379        movs   ip, ip, asr #20
380        movmi  ip, #0
381        cmp    ip, #255
382        movgt  ip, #255
383        orr    a2, a2, ip, lsl #8
384        strh   a2, [v1, lr]!
385        add    a3, a3, v3
386        movs   a2, a3, asr #20
387        movmi  a2, #0
388        cmp    a2, #255
389        movgt  a2, #255
390        add    a4, a4, v4
391        movs   a4, a4, asr #20
392        movmi  a4, #0
393        cmp    a4, #255
394        movgt  a4, #255
395        orr    a2, a2, a4, lsl #8
396        ldmfd  sp!, {a3, a4}
397        strh   a2, [v2, -lr]!
398
399        add    a2, a3, v5
400        movs   a2, a2, asr #20
401        movmi  a2, #0
402        cmp    a2, #255
403        movgt  a2, #255
404        add    ip, a4, v6
405        movs   ip, ip, asr #20
406        movmi  ip, #0
407        cmp    ip, #255
408        movgt  ip, #255
409        orr    a2, a2, ip, lsl #8
410        strh   a2, [v1, lr]!
411        sub    a3, a3, v5
412        movs   a2, a3, asr #20
413        movmi  a2, #0
414        cmp    a2, #255
415        movgt  a2, #255
416        sub    a4, a4, v6
417        movs   a4, a4, asr #20
418        movmi  a4, #0
419        cmp    a4, #255
420        movgt  a4, #255
421        orr    a2, a2, a4, lsl #8
422        ldmfd  sp!, {a3, a4}
423        strh   a2, [v2, -lr]!
424
425        add    a2, a3, v7
426        movs   a2, a2, asr #20
427        movmi  a2, #0
428        cmp    a2, #255
429        movgt  a2, #255
430        add    ip, a4, fp
431        movs   ip, ip, asr #20
432        movmi  ip, #0
433        cmp    ip, #255
434        movgt  ip, #255
435        orr    a2, a2, ip, lsl #8
436        strh   a2, [v1, lr]
437        sub    a3, a3, v7
438        movs   a2, a3, asr #20
439        movmi  a2, #0
440        cmp    a2, #255
441        movgt  a2, #255
442        sub    a4, a4, fp
443        movs   a4, a4, asr #20
444        movmi  a4, #0
445        cmp    a4, #255
446        movgt  a4, #255
447        orr    a2, a2, a4, lsl #8
448        strh   a2, [v2, -lr]
449
450        ldr    pc, [sp], #4
451        .endfunc
452
453function idct_col_add_armv5te
454        str    lr, [sp, #-4]!
455
456        idct_col
457
458        ldr    lr, [sp, #36]
459
460        ldmfd  sp!, {a3, a4}
461        ldrh   ip, [lr]
462        add    a2, a3, v1
463        mov    a2, a2, asr #20
464        sub    a3, a3, v1
465        and    v1, ip, #255
466        adds   a2, a2, v1
467        movmi  a2, #0
468        cmp    a2, #255
469        movgt  a2, #255
470        add    v1, a4, v2
471        mov    v1, v1, asr #20
472        adds   v1, v1, ip, lsr #8
473        movmi  v1, #0
474        cmp    v1, #255
475        movgt  v1, #255
476        orr    a2, a2, v1, lsl #8
477        ldr    v1, [sp, #32]
478        sub    a4, a4, v2
479        rsb    v2, v1, v1, lsl #3
480        ldrh   ip, [v2, lr]!
481        strh   a2, [lr]
482        mov    a3, a3, asr #20
483        and    a2, ip, #255
484        adds   a3, a3, a2
485        movmi  a3, #0
486        cmp    a3, #255
487        movgt  a3, #255
488        mov    a4, a4, asr #20
489        adds   a4, a4, ip, lsr #8
490        movmi  a4, #0
491        cmp    a4, #255
492        movgt  a4, #255
493        add    a2, lr, #2
494        str    a2, [sp, #28]
495        orr    a2, a3, a4, lsl #8
496        strh   a2, [v2]
497
498        ldmfd  sp!, {a3, a4}
499        ldrh   ip, [lr, v1]!
500        sub    a2, a3, v3
501        mov    a2, a2, asr #20
502        add    a3, a3, v3
503        and    v3, ip, #255
504        adds   a2, a2, v3
505        movmi  a2, #0
506        cmp    a2, #255
507        movgt  a2, #255
508        sub    v3, a4, v4
509        mov    v3, v3, asr #20
510        adds   v3, v3, ip, lsr #8
511        movmi  v3, #0
512        cmp    v3, #255
513        movgt  v3, #255
514        orr    a2, a2, v3, lsl #8
515        add    a4, a4, v4
516        ldrh   ip, [v2, -v1]!
517        strh   a2, [lr]
518        mov    a3, a3, asr #20
519        and    a2, ip, #255
520        adds   a3, a3, a2
521        movmi  a3, #0
522        cmp    a3, #255
523        movgt  a3, #255
524        mov    a4, a4, asr #20
525        adds   a4, a4, ip, lsr #8
526        movmi  a4, #0
527        cmp    a4, #255
528        movgt  a4, #255
529        orr    a2, a3, a4, lsl #8
530        strh   a2, [v2]
531
532        ldmfd  sp!, {a3, a4}
533        ldrh   ip, [lr, v1]!
534        add    a2, a3, v5
535        mov    a2, a2, asr #20
536        sub    a3, a3, v5
537        and    v3, ip, #255
538        adds   a2, a2, v3
539        movmi  a2, #0
540        cmp    a2, #255
541        movgt  a2, #255
542        add    v3, a4, v6
543        mov    v3, v3, asr #20
544        adds   v3, v3, ip, lsr #8
545        movmi  v3, #0
546        cmp    v3, #255
547        movgt  v3, #255
548        orr    a2, a2, v3, lsl #8
549        sub    a4, a4, v6
550        ldrh   ip, [v2, -v1]!
551        strh   a2, [lr]
552        mov    a3, a3, asr #20
553        and    a2, ip, #255
554        adds   a3, a3, a2
555        movmi  a3, #0
556        cmp    a3, #255
557        movgt  a3, #255
558        mov    a4, a4, asr #20
559        adds   a4, a4, ip, lsr #8
560        movmi  a4, #0
561        cmp    a4, #255
562        movgt  a4, #255
563        orr    a2, a3, a4, lsl #8
564        strh   a2, [v2]
565
566        ldmfd  sp!, {a3, a4}
567        ldrh   ip, [lr, v1]!
568        add    a2, a3, v7
569        mov    a2, a2, asr #20
570        sub    a3, a3, v7
571        and    v3, ip, #255
572        adds   a2, a2, v3
573        movmi  a2, #0
574        cmp    a2, #255
575        movgt  a2, #255
576        add    v3, a4, fp
577        mov    v3, v3, asr #20
578        adds   v3, v3, ip, lsr #8
579        movmi  v3, #0
580        cmp    v3, #255
581        movgt  v3, #255
582        orr    a2, a2, v3, lsl #8
583        sub    a4, a4, fp
584        ldrh   ip, [v2, -v1]!
585        strh   a2, [lr]
586        mov    a3, a3, asr #20
587        and    a2, ip, #255
588        adds   a3, a3, a2
589        movmi  a3, #0
590        cmp    a3, #255
591        movgt  a3, #255
592        mov    a4, a4, asr #20
593        adds   a4, a4, ip, lsr #8
594        movmi  a4, #0
595        cmp    a4, #255
596        movgt  a4, #255
597        orr    a2, a3, a4, lsl #8
598        strh   a2, [v2]
599
600        ldr    pc, [sp], #4
601        .endfunc
602
603function simple_idct_armv5te, export=1
604        stmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
605
606        bl     idct_row_armv5te
607        add    a1, a1, #16
608        bl     idct_row_armv5te
609        add    a1, a1, #16
610        bl     idct_row_armv5te
611        add    a1, a1, #16
612        bl     idct_row_armv5te
613        add    a1, a1, #16
614        bl     idct_row_armv5te
615        add    a1, a1, #16
616        bl     idct_row_armv5te
617        add    a1, a1, #16
618        bl     idct_row_armv5te
619        add    a1, a1, #16
620        bl     idct_row_armv5te
621
622        sub    a1, a1, #(16*7)
623
624        bl     idct_col_armv5te
625        add    a1, a1, #4
626        bl     idct_col_armv5te
627        add    a1, a1, #4
628        bl     idct_col_armv5te
629        add    a1, a1, #4
630        bl     idct_col_armv5te
631
632        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
633        .endfunc
634
635function simple_idct_add_armv5te, export=1
636        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
637
638        mov    a1, a3
639
640        bl     idct_row_armv5te
641        add    a1, a1, #16
642        bl     idct_row_armv5te
643        add    a1, a1, #16
644        bl     idct_row_armv5te
645        add    a1, a1, #16
646        bl     idct_row_armv5te
647        add    a1, a1, #16
648        bl     idct_row_armv5te
649        add    a1, a1, #16
650        bl     idct_row_armv5te
651        add    a1, a1, #16
652        bl     idct_row_armv5te
653        add    a1, a1, #16
654        bl     idct_row_armv5te
655
656        sub    a1, a1, #(16*7)
657
658        bl     idct_col_add_armv5te
659        add    a1, a1, #4
660        bl     idct_col_add_armv5te
661        add    a1, a1, #4
662        bl     idct_col_add_armv5te
663        add    a1, a1, #4
664        bl     idct_col_add_armv5te
665
666        add    sp, sp, #8
667        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
668        .endfunc
669
670function simple_idct_put_armv5te, export=1
671        stmfd  sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
672
673        mov    a1, a3
674
675        bl     idct_row_armv5te
676        add    a1, a1, #16
677        bl     idct_row_armv5te
678        add    a1, a1, #16
679        bl     idct_row_armv5te
680        add    a1, a1, #16
681        bl     idct_row_armv5te
682        add    a1, a1, #16
683        bl     idct_row_armv5te
684        add    a1, a1, #16
685        bl     idct_row_armv5te
686        add    a1, a1, #16
687        bl     idct_row_armv5te
688        add    a1, a1, #16
689        bl     idct_row_armv5te
690
691        sub    a1, a1, #(16*7)
692
693        bl     idct_col_put_armv5te
694        add    a1, a1, #4
695        bl     idct_col_put_armv5te
696        add    a1, a1, #4
697        bl     idct_col_put_armv5te
698        add    a1, a1, #4
699        bl     idct_col_put_armv5te
700
701        add    sp, sp, #8
702        ldmfd  sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
703        .endfunc
704