1/* 2 * Copyright (C) 2010 David Conrad 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#include "libavutil/x86/cpu.h" 22#include "dsputil_x86.h" 23#include "diracdsp_mmx.h" 24#include "fpel.h" 25 26void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); 27void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); 28void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); 29void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); 30 31#define HPEL_FILTER(MMSIZE, EXT) \ 32 void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \ 33 void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \ 34 \ 35 static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ 36 const uint8_t *src, int stride, int width, int height) \ 37 { \ 38 while( height-- ) \ 39 { \ 40 ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \ 41 ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \ 42 ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \ 43 \ 44 dsth += stride; \ 45 dstv += stride; \ 46 dstc += stride; \ 47 src += stride; \ 48 } \ 49 } 50 51#if !ARCH_X86_64 52HPEL_FILTER(8, mmx) 53#endif 54HPEL_FILTER(16, sse2) 55 56#define PIXFUNC(PFX, IDX, EXT) \ 57 /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT;*/ \ 58 c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \ 59 c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT 60 61#define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\ 62void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 63{\ 64 if (h&3)\ 65 ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\ 66 else\ 67 OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\ 68}\ 69void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 70{\ 71 if (h&3)\ 72 ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\ 73 else\ 74 OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ 75}\ 76void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ 77{\ 78 if (h&3) {\ 79 ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\ 80 } else {\ 81 OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ 82 OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ 83 }\ 84} 85 86DIRAC_PIXOP(put, ff_put, mmx) 87DIRAC_PIXOP(avg, ff_avg, mmx) 88DIRAC_PIXOP(avg, ff_avg, mmxext) 89 90void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) 91{ 92 if (h&3) 93 ff_put_dirac_pixels16_c(dst, src, stride, h); 94 else 95 ff_put_pixels16_sse2(dst, src[0], stride, h); 96} 97void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) 98{ 99 if (h&3) 100 ff_avg_dirac_pixels16_c(dst, src, stride, h); 101 else 102 ff_avg_pixels16_sse2(dst, src[0], stride, h); 103} 104void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) 105{ 106 if (h&3) { 107 ff_put_dirac_pixels32_c(dst, src, stride, h); 108 } else { 109 ff_put_pixels16_sse2(dst , src[0] , stride, h); 110 ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h); 111 } 112} 113void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h) 114{ 115 if (h&3) { 116 ff_avg_dirac_pixels32_c(dst, src, stride, h); 117 } else { 118 ff_avg_pixels16_sse2(dst , src[0] , stride, h); 119 ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h); 120 } 121} 122 123void ff_diracdsp_init_mmx(DiracDSPContext* c) 124{ 125 int mm_flags = av_get_cpu_flags(); 126 127 if (EXTERNAL_MMX(mm_flags)) { 128 c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; 129#if !ARCH_X86_64 130 c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx; 131 c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx; 132 c->dirac_hpel_filter = dirac_hpel_filter_mmx; 133 c->add_rect_clamped = ff_add_rect_clamped_mmx; 134 c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx; 135#endif 136 PIXFUNC(put, 0, mmx); 137 PIXFUNC(avg, 0, mmx); 138 } 139 140 if (EXTERNAL_MMXEXT(mm_flags)) { 141 PIXFUNC(avg, 0, mmxext); 142 } 143 144 if (EXTERNAL_SSE2(mm_flags)) { 145 c->dirac_hpel_filter = dirac_hpel_filter_sse2; 146 c->add_rect_clamped = ff_add_rect_clamped_sse2; 147 c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2; 148 149 c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; 150 c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; 151 152 c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2; 153 c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2; 154 c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2; 155 c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2; 156 } 157} 158