1;****************************************************************************** 2;* SIMD-optimized fullpel functions 3;* Copyright (c) 2008 Loren Merritt 4;* Copyright (c) 2003-2013 Michael Niedermayer 5;* Copyright (c) 2013 Daniel Kang 6;* 7;* This file is part of FFmpeg. 8;* 9;* FFmpeg is free software; you can redistribute it and/or 10;* modify it under the terms of the GNU Lesser General Public 11;* License as published by the Free Software Foundation; either 12;* version 2.1 of the License, or (at your option) any later version. 13;* 14;* FFmpeg is distributed in the hope that it will be useful, 15;* but WITHOUT ANY WARRANTY; without even the implied warranty of 16;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17;* Lesser General Public License for more details. 18;* 19;* You should have received a copy of the GNU Lesser General Public 20;* License along with FFmpeg; if not, write to the Free Software 21;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22;****************************************************************************** 23 24%include "libavutil/x86/x86util.asm" 25 26SECTION .text 27 28%macro PAVGB_MMX 4 29 LOAD %3, %1 30 por %3, %2 31 pxor %2, %1 32 pand %2, %4 33 psrlq %2, 1 34 psubb %3, %2 35 SWAP %2, %3 36%endmacro 37 38; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels, 39; ptrdiff_t line_size, int h) 40%macro OP_PIXELS 2 41%if %2 == mmsize/2 42%define LOAD movh 43%define SAVE movh 44%define LEN mmsize 45%else 46%define LOAD movu 47%define SAVE mova 48%define LEN %2 49%endif 50cglobal %1_pixels%2, 4,5,4 51 movsxdifnidn r2, r2d 52 lea r4, [r2*3] 53%ifidn %1, avg 54%if notcpuflag(mmxext) 55 pcmpeqd m6, m6 56 paddb m6, m6 57%endif 58%endif 59.loop: 60%assign %%i 0 61%rep LEN/mmsize 62 LOAD m0, [r1 + %%i] 63 LOAD m1, [r1+r2 + %%i] 64 LOAD m2, [r1+r2*2 + %%i] 65 LOAD m3, [r1+r4 + %%i] 66%ifidn %1, avg 67%if notcpuflag(mmxext) 68 PAVGB_MMX [r0 + %%i], m0, m4, m6 69 PAVGB_MMX [r0+r2 + %%i], m1, m5, m6 70 PAVGB_MMX [r0+r2*2 + %%i], m2, m4, m6 71 PAVGB_MMX [r0+r4 + %%i], m3, m5, m6 72%else 73 pavgb m0, [r0 + %%i] 74 pavgb m1, [r0+r2 + %%i] 75 pavgb m2, [r0+r2*2 + %%i] 76 pavgb m3, [r0+r4 + %%i] 77%endif 78%endif 79 SAVE [r0 + %%i], m0 80 SAVE [r0+r2 + %%i], m1 81 SAVE [r0+r2*2 + %%i], m2 82 SAVE [r0+r4 + %%i], m3 83%assign %%i %%i+mmsize 84%endrep 85 sub r3d, 4 86 lea r1, [r1+r2*4] 87 lea r0, [r0+r2*4] 88 jne .loop 89 RET 90%endmacro 91 92INIT_MMX mmx 93OP_PIXELS put, 4 94OP_PIXELS avg, 4 95OP_PIXELS put, 8 96OP_PIXELS avg, 8 97OP_PIXELS put, 16 98OP_PIXELS avg, 16 99 100INIT_MMX mmxext 101OP_PIXELS avg, 4 102OP_PIXELS avg, 8 103OP_PIXELS avg, 16 104 105INIT_XMM sse2 106OP_PIXELS put, 16 107OP_PIXELS avg, 16 108