1;***************************************************************************** 2;* SIMD-optimized pixel operations 3;***************************************************************************** 4;* Copyright (c) 2000, 2001 Fabrice Bellard 5;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 6;* 7;* This file is part of FFmpeg. 8;* 9;* FFmpeg is free software; you can redistribute it and/or 10;* modify it under the terms of the GNU Lesser General Public 11;* License as published by the Free Software Foundation; either 12;* version 2.1 of the License, or (at your option) any later version. 13;* 14;* FFmpeg is distributed in the hope that it will be useful, 15;* but WITHOUT ANY WARRANTY; without even the implied warranty of 16;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17;* Lesser General Public License for more details. 18;* 19;* You should have received a copy of the GNU Lesser General Public 20;* License along with FFmpeg; if not, write to the Free Software 21;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22;***************************************************************************** 23 24%include "libavutil/x86/x86util.asm" 25 26SECTION .text 27 28INIT_MMX mmx 29; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) 30cglobal get_pixels, 3,4 31 movsxdifnidn r2, r2d 32 add r0, 128 33 mov r3, -128 34 pxor m7, m7 35.loop: 36 mova m0, [r1] 37 mova m2, [r1+r2] 38 mova m1, m0 39 mova m3, m2 40 punpcklbw m0, m7 41 punpckhbw m1, m7 42 punpcklbw m2, m7 43 punpckhbw m3, m7 44 mova [r0+r3+ 0], m0 45 mova [r0+r3+ 8], m1 46 mova [r0+r3+16], m2 47 mova [r0+r3+24], m3 48 lea r1, [r1+r2*2] 49 add r3, 32 50 js .loop 51 REP_RET 52 53INIT_XMM sse2 54cglobal get_pixels, 3, 4, 5 55 movsxdifnidn r2, r2d 56 lea r3, [r2*3] 57 pxor m4, m4 58 movh m0, [r1] 59 movh m1, [r1+r2] 60 movh m2, [r1+r2*2] 61 movh m3, [r1+r3] 62 lea r1, [r1+r2*4] 63 punpcklbw m0, m4 64 punpcklbw m1, m4 65 punpcklbw m2, m4 66 punpcklbw m3, m4 67 mova [r0], m0 68 mova [r0+0x10], m1 69 mova [r0+0x20], m2 70 mova [r0+0x30], m3 71 movh m0, [r1] 72 movh m1, [r1+r2*1] 73 movh m2, [r1+r2*2] 74 movh m3, [r1+r3] 75 punpcklbw m0, m4 76 punpcklbw m1, m4 77 punpcklbw m2, m4 78 punpcklbw m3, m4 79 mova [r0+0x40], m0 80 mova [r0+0x50], m1 81 mova [r0+0x60], m2 82 mova [r0+0x70], m3 83 RET 84 85INIT_MMX mmx 86; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, 87; int stride); 88cglobal diff_pixels, 4,5 89 movsxdifnidn r3, r3d 90 pxor m7, m7 91 add r0, 128 92 mov r4, -128 93.loop: 94 mova m0, [r1] 95 mova m2, [r2] 96 mova m1, m0 97 mova m3, m2 98 punpcklbw m0, m7 99 punpckhbw m1, m7 100 punpcklbw m2, m7 101 punpckhbw m3, m7 102 psubw m0, m2 103 psubw m1, m3 104 mova [r0+r4+0], m0 105 mova [r0+r4+8], m1 106 add r1, r3 107 add r2, r3 108 add r4, 16 109 jne .loop 110 REP_RET 111 112INIT_XMM sse2 113cglobal diff_pixels, 4, 5, 5 114 movsxdifnidn r3, r3d 115 pxor m4, m4 116 add r0, 128 117 mov r4, -128 118.loop: 119 movh m0, [r1] 120 movh m2, [r2] 121 movh m1, [r1+r3] 122 movh m3, [r2+r3] 123 punpcklbw m0, m4 124 punpcklbw m1, m4 125 punpcklbw m2, m4 126 punpcklbw m3, m4 127 psubw m0, m2 128 psubw m1, m3 129 mova [r0+r4+0 ], m0 130 mova [r0+r4+16], m1 131 lea r1, [r1+r3*2] 132 lea r2, [r2+r3*2] 133 add r4, 32 134 jne .loop 135 RET 136