1;***************************************************************************** 2;* SSE2-optimized H.264 iDCT 3;***************************************************************************** 4;* Copyright (C) 2003-2008 x264 project 5;* 6;* Authors: Laurent Aimar <fenrir@via.ecp.fr> 7;* Loren Merritt <lorenm@u.washington.edu> 8;* Holger Lubitz <hal@duncan.ol.sub.de> 9;* Min Chen <chenm001.163.com> 10;* 11;* This program is free software; you can redistribute it and/or modify 12;* it under the terms of the GNU General Public License as published by 13;* the Free Software Foundation; either version 2 of the License, or 14;* (at your option) any later version. 15;* 16;* This program is distributed in the hope that it will be useful, 17;* but WITHOUT ANY WARRANTY; without even the implied warranty of 18;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19;* GNU General Public License for more details. 20;* 21;* You should have received a copy of the GNU General Public License 22;* along with this program; if not, write to the Free Software 23;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. 24;***************************************************************************** 25 26%include "x86inc.asm" 27%include "x86util.asm" 28 29SECTION_RODATA 30pw_32: times 8 dw 32 31 32SECTION .text 33 34%macro IDCT4_1D 6 35 SUMSUB_BA m%3, m%1 36 SUMSUBD2_AB m%2, m%4, m%6, m%5 37 SUMSUB_BADC m%2, m%3, m%5, m%1 38 SWAP %1, %2, %5, %4, %3 39%endmacro 40 41INIT_XMM 42cglobal x264_add8x4_idct_sse2, 3,3 43 movq m0, [r1+ 0] 44 movq m1, [r1+ 8] 45 movq m2, [r1+16] 46 movq m3, [r1+24] 47 movhps m0, [r1+32] 48 movhps m1, [r1+40] 49 movhps m2, [r1+48] 50 movhps m3, [r1+56] 51 IDCT4_1D 0,1,2,3,4,5 52 TRANSPOSE2x4x4W 0,1,2,3,4 53 paddw m0, [pw_32 GLOBAL] 54 IDCT4_1D 0,1,2,3,4,5 55 pxor m7, m7 56 STORE_DIFF m0, m4, m7, [r0] 57 STORE_DIFF m1, m4, m7, [r0+r2] 58 lea r0, [r0+r2*2] 59 STORE_DIFF m2, m4, m7, [r0] 60 STORE_DIFF m3, m4, m7, [r0+r2] 61 RET 62