1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "libavutil/attributes.h" 20#include "libavutil/avassert.h" 21#include "libavutil/cpu.h" 22#include "libavutil/x86/cpu.h" 23#include "libavcodec/avcodec.h" 24#include "libavcodec/mpegvideoencdsp.h" 25 26int ff_pix_sum16_mmx(uint8_t *pix, int line_size); 27int ff_pix_sum16_sse2(uint8_t *pix, int line_size); 28int ff_pix_sum16_xop(uint8_t *pix, int line_size); 29int ff_pix_norm1_mmx(uint8_t *pix, int line_size); 30int ff_pix_norm1_sse2(uint8_t *pix, int line_size); 31 32#if HAVE_INLINE_ASM 33 34#define PHADDD(a, t) \ 35 "movq " #a ", " #t " \n\t" \ 36 "psrlq $32, " #a " \n\t" \ 37 "paddd " #t ", " #a " \n\t" 38 39/* 40 * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31] 41 * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31] 42 * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30] 43 */ 44#define PMULHRW(x, y, s, o) \ 45 "pmulhw " #s ", " #x " \n\t" \ 46 "pmulhw " #s ", " #y " \n\t" \ 47 "paddw " #o ", " #x " \n\t" \ 48 "paddw " #o ", " #y " \n\t" \ 49 "psraw $1, " #x " \n\t" \ 50 "psraw $1, " #y " \n\t" 51#define DEF(x) x ## _mmx 52#define SET_RND MOVQ_WONE 53#define SCALE_OFFSET 1 54 55#include "mpegvideoenc_qns_template.c" 56 57#undef DEF 58#undef SET_RND 59#undef SCALE_OFFSET 60#undef PMULHRW 61 62#define DEF(x) x ## _3dnow 63#define SET_RND(x) 64#define SCALE_OFFSET 0 65#define PMULHRW(x, y, s, o) \ 66 "pmulhrw " #s ", " #x " \n\t" \ 67 "pmulhrw " #s ", " #y " \n\t" 68 69#include "mpegvideoenc_qns_template.c" 70 71#undef DEF 72#undef SET_RND 73#undef SCALE_OFFSET 74#undef PMULHRW 75 76#if HAVE_SSSE3_INLINE 77#undef PHADDD 78#define DEF(x) x ## _ssse3 79#define SET_RND(x) 80#define SCALE_OFFSET -1 81 82#define PHADDD(a, t) \ 83 "pshufw $0x0E, " #a ", " #t " \n\t" \ 84 /* faster than phaddd on core2 */ \ 85 "paddd " #t ", " #a " \n\t" 86 87#define PMULHRW(x, y, s, o) \ 88 "pmulhrsw " #s ", " #x " \n\t" \ 89 "pmulhrsw " #s ", " #y " \n\t" 90 91#include "mpegvideoenc_qns_template.c" 92 93#undef DEF 94#undef SET_RND 95#undef SCALE_OFFSET 96#undef PMULHRW 97#undef PHADDD 98#endif /* HAVE_SSSE3_INLINE */ 99 100/* Draw the edges of width 'w' of an image of size width, height 101 * this MMX version can only handle w == 8 || w == 16. */ 102static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, 103 int w, int h, int sides) 104{ 105 uint8_t *ptr, *last_line; 106 int i; 107 108 last_line = buf + (height - 1) * wrap; 109 /* left and right */ 110 ptr = buf; 111 if (w == 8) { 112 __asm__ volatile ( 113 "1: \n\t" 114 "movd (%0), %%mm0 \n\t" 115 "punpcklbw %%mm0, %%mm0 \n\t" 116 "punpcklwd %%mm0, %%mm0 \n\t" 117 "punpckldq %%mm0, %%mm0 \n\t" 118 "movq %%mm0, -8(%0) \n\t" 119 "movq -8(%0, %2), %%mm1 \n\t" 120 "punpckhbw %%mm1, %%mm1 \n\t" 121 "punpckhwd %%mm1, %%mm1 \n\t" 122 "punpckhdq %%mm1, %%mm1 \n\t" 123 "movq %%mm1, (%0, %2) \n\t" 124 "add %1, %0 \n\t" 125 "cmp %3, %0 \n\t" 126 "jb 1b \n\t" 127 : "+r" (ptr) 128 : "r" ((x86_reg) wrap), "r" ((x86_reg) width), 129 "r" (ptr + wrap * height)); 130 } else if (w == 16) { 131 __asm__ volatile ( 132 "1: \n\t" 133 "movd (%0), %%mm0 \n\t" 134 "punpcklbw %%mm0, %%mm0 \n\t" 135 "punpcklwd %%mm0, %%mm0 \n\t" 136 "punpckldq %%mm0, %%mm0 \n\t" 137 "movq %%mm0, -8(%0) \n\t" 138 "movq %%mm0, -16(%0) \n\t" 139 "movq -8(%0, %2), %%mm1 \n\t" 140 "punpckhbw %%mm1, %%mm1 \n\t" 141 "punpckhwd %%mm1, %%mm1 \n\t" 142 "punpckhdq %%mm1, %%mm1 \n\t" 143 "movq %%mm1, (%0, %2) \n\t" 144 "movq %%mm1, 8(%0, %2) \n\t" 145 "add %1, %0 \n\t" 146 "cmp %3, %0 \n\t" 147 "jb 1b \n\t" 148 : "+r"(ptr) 149 : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) 150 ); 151 } else { 152 av_assert1(w == 4); 153 __asm__ volatile ( 154 "1: \n\t" 155 "movd (%0), %%mm0 \n\t" 156 "punpcklbw %%mm0, %%mm0 \n\t" 157 "punpcklwd %%mm0, %%mm0 \n\t" 158 "movd %%mm0, -4(%0) \n\t" 159 "movd -4(%0, %2), %%mm1 \n\t" 160 "punpcklbw %%mm1, %%mm1 \n\t" 161 "punpckhwd %%mm1, %%mm1 \n\t" 162 "punpckhdq %%mm1, %%mm1 \n\t" 163 "movd %%mm1, (%0, %2) \n\t" 164 "add %1, %0 \n\t" 165 "cmp %3, %0 \n\t" 166 "jb 1b \n\t" 167 : "+r" (ptr) 168 : "r" ((x86_reg) wrap), "r" ((x86_reg) width), 169 "r" (ptr + wrap * height)); 170 } 171 172 /* top and bottom (and hopefully also the corners) */ 173 if (sides & EDGE_TOP) { 174 for (i = 0; i < h; i += 4) { 175 ptr = buf - (i + 1) * wrap - w; 176 __asm__ volatile ( 177 "1: \n\t" 178 "movq (%1, %0), %%mm0 \n\t" 179 "movq %%mm0, (%0) \n\t" 180 "movq %%mm0, (%0, %2) \n\t" 181 "movq %%mm0, (%0, %2, 2) \n\t" 182 "movq %%mm0, (%0, %3) \n\t" 183 "add $8, %0 \n\t" 184 "cmp %4, %0 \n\t" 185 "jb 1b \n\t" 186 : "+r" (ptr) 187 : "r" ((x86_reg) buf - (x86_reg) ptr - w), 188 "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), 189 "r" (ptr + width + 2 * w)); 190 } 191 } 192 193 if (sides & EDGE_BOTTOM) { 194 for (i = 0; i < h; i += 4) { 195 ptr = last_line + (i + 1) * wrap - w; 196 __asm__ volatile ( 197 "1: \n\t" 198 "movq (%1, %0), %%mm0 \n\t" 199 "movq %%mm0, (%0) \n\t" 200 "movq %%mm0, (%0, %2) \n\t" 201 "movq %%mm0, (%0, %2, 2) \n\t" 202 "movq %%mm0, (%0, %3) \n\t" 203 "add $8, %0 \n\t" 204 "cmp %4, %0 \n\t" 205 "jb 1b \n\t" 206 : "+r" (ptr) 207 : "r" ((x86_reg) last_line - (x86_reg) ptr - w), 208 "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), 209 "r" (ptr + width + 2 * w)); 210 } 211 } 212} 213 214#endif /* HAVE_INLINE_ASM */ 215 216av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, 217 AVCodecContext *avctx) 218{ 219 int cpu_flags = av_get_cpu_flags(); 220 221 if (EXTERNAL_MMX(cpu_flags)) { 222 c->pix_sum = ff_pix_sum16_mmx; 223 c->pix_norm1 = ff_pix_norm1_mmx; 224 } 225 226 if (EXTERNAL_SSE2(cpu_flags)) { 227 c->pix_sum = ff_pix_sum16_sse2; 228 c->pix_norm1 = ff_pix_norm1_sse2; 229 } 230 231 if (EXTERNAL_XOP(cpu_flags)) { 232 c->pix_sum = ff_pix_sum16_xop; 233 } 234 235#if HAVE_INLINE_ASM 236 237 if (INLINE_MMX(cpu_flags)) { 238 if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { 239 c->try_8x8basis = try_8x8basis_mmx; 240 } 241 c->add_8x8basis = add_8x8basis_mmx; 242 243 if (avctx->bits_per_raw_sample <= 8) { 244 c->draw_edges = draw_edges_mmx; 245 } 246 } 247 248 if (INLINE_AMD3DNOW(cpu_flags)) { 249 if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { 250 c->try_8x8basis = try_8x8basis_3dnow; 251 } 252 c->add_8x8basis = add_8x8basis_3dnow; 253 } 254 255#if HAVE_SSSE3_INLINE 256 if (INLINE_SSSE3(cpu_flags)) { 257 if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { 258 c->try_8x8basis = try_8x8basis_ssse3; 259 } 260 c->add_8x8basis = add_8x8basis_ssse3; 261 } 262#endif /* HAVE_SSSE3_INLINE */ 263 264#endif /* HAVE_INLINE_ASM */ 265} 266