1/* 2 * MMX optimized discrete wavelet transform 3 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 4 * Copyright (c) 2010 David Conrad 5 * 6 * This file is part of FFmpeg. 7 * 8 * FFmpeg is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public 10 * License as published by the Free Software Foundation; either 11 * version 2.1 of the License, or (at your option) any later version. 12 * 13 * FFmpeg is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with FFmpeg; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 21 */ 22 23#include "libavutil/x86/asm.h" 24#include "dsputil_x86.h" 25#include "dirac_dwt.h" 26 27#define COMPOSE_VERTICAL(ext, align) \ 28void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \ 29void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \ 30void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \ 31void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \ 32void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \ 33void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\ 34void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\ 35\ 36static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \ 37{ \ 38 int i, width_align = width&~(align-1); \ 39\ 40 for(i=width_align; i<width; i++) \ 41 b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \ 42\ 43 ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \ 44} \ 45\ 46static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \ 47{ \ 48 int i, width_align = width&~(align-1); \ 49\ 50 for(i=width_align; i<width; i++) \ 51 b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \ 52\ 53 ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \ 54} \ 55\ 56static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \ 57 IDWTELEM *b3, IDWTELEM *b4, int width) \ 58{ \ 59 int i, width_align = width&~(align-1); \ 60\ 61 for(i=width_align; i<width; i++) \ 62 b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \ 63\ 64 ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \ 65} \ 66\ 67static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \ 68 IDWTELEM *b3, IDWTELEM *b4, int width) \ 69{ \ 70 int i, width_align = width&~(align-1); \ 71\ 72 for(i=width_align; i<width; i++) \ 73 b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \ 74\ 75 ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \ 76} \ 77static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \ 78{ \ 79 int i, width_align = width&~(align-1); \ 80\ 81 for(i=width_align; i<width; i++) { \ 82 b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \ 83 b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \ 84 } \ 85\ 86 ff_vertical_compose_haar##ext(b0, b1, width_align); \ 87} \ 88static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\ 89{\ 90 int w2= w>>1;\ 91 int x= w2 - (w2&(align-1));\ 92 ff_horizontal_compose_haar0i##ext(b, tmp, w);\ 93\ 94 for (; x < w2; x++) {\ 95 b[2*x ] = tmp[x];\ 96 b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\ 97 }\ 98}\ 99static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\ 100{\ 101 int w2= w>>1;\ 102 int x= w2 - (w2&(align-1));\ 103 ff_horizontal_compose_haar1i##ext(b, tmp, w);\ 104\ 105 for (; x < w2; x++) {\ 106 b[2*x ] = (tmp[x] + 1)>>1;\ 107 b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\ 108 }\ 109}\ 110\ 111 112#if HAVE_YASM 113#if !ARCH_X86_64 114COMPOSE_VERTICAL(_mmx, 4) 115#endif 116COMPOSE_VERTICAL(_sse2, 8) 117 118 119void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w); 120 121static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w) 122{ 123 int w2= w>>1; 124 int x= w2 - (w2&7); 125 ff_horizontal_compose_dd97i_ssse3(b, tmp, w); 126 127 for (; x < w2; x++) { 128 b[2*x ] = (tmp[x] + 1)>>1; 129 b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1; 130 } 131} 132#endif 133 134void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type) 135{ 136#if HAVE_YASM 137 int mm_flags = av_get_cpu_flags(); 138 139#if !ARCH_X86_64 140 if (!(mm_flags & AV_CPU_FLAG_MMX)) 141 return; 142 143 switch (type) { 144 case DWT_DIRAC_DD9_7: 145 d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx; 146 d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx; 147 break; 148 case DWT_DIRAC_LEGALL5_3: 149 d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx; 150 d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_mmx; 151 break; 152 case DWT_DIRAC_DD13_7: 153 d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_mmx; 154 d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx; 155 break; 156 case DWT_DIRAC_HAAR0: 157 d->vertical_compose = (void*)vertical_compose_haar_mmx; 158 d->horizontal_compose = horizontal_compose_haar0i_mmx; 159 break; 160 case DWT_DIRAC_HAAR1: 161 d->vertical_compose = (void*)vertical_compose_haar_mmx; 162 d->horizontal_compose = horizontal_compose_haar1i_mmx; 163 break; 164 } 165#endif 166 167 if (!(mm_flags & AV_CPU_FLAG_SSE2)) 168 return; 169 170 switch (type) { 171 case DWT_DIRAC_DD9_7: 172 d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2; 173 d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2; 174 break; 175 case DWT_DIRAC_LEGALL5_3: 176 d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2; 177 d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_sse2; 178 break; 179 case DWT_DIRAC_DD13_7: 180 d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_sse2; 181 d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2; 182 break; 183 case DWT_DIRAC_HAAR0: 184 d->vertical_compose = (void*)vertical_compose_haar_sse2; 185 d->horizontal_compose = horizontal_compose_haar0i_sse2; 186 break; 187 case DWT_DIRAC_HAAR1: 188 d->vertical_compose = (void*)vertical_compose_haar_sse2; 189 d->horizontal_compose = horizontal_compose_haar1i_sse2; 190 break; 191 } 192 193 if (!(mm_flags & AV_CPU_FLAG_SSSE3)) 194 return; 195 196 switch (type) { 197 case DWT_DIRAC_DD9_7: 198 d->horizontal_compose = horizontal_compose_dd97i_ssse3; 199 break; 200 } 201#endif // HAVE_YASM 202} 203