1/* 2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder 3 * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at> 4 * 5 * This file is part of Libav. 6 * 7 * Libav is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * Libav is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with Libav; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * H.264 / AVC / MPEG4 part10 DSP functions. 25 * @author Michael Niedermayer <michaelni@gmx.at> 26 */ 27 28#include "bit_depth_template.c" 29 30#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) 31#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) 32#define H264_WEIGHT(W) \ 33static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int height, \ 34 int log2_denom, int weight, int offset) \ 35{ \ 36 int y; \ 37 pixel *block = (pixel*)_block; \ 38 stride /= sizeof(pixel); \ 39 offset <<= (log2_denom + (BIT_DEPTH-8)); \ 40 if(log2_denom) offset += 1<<(log2_denom-1); \ 41 for (y = 0; y < height; y++, block += stride) { \ 42 op_scale1(0); \ 43 op_scale1(1); \ 44 if(W==2) continue; \ 45 op_scale1(2); \ 46 op_scale1(3); \ 47 if(W==4) continue; \ 48 op_scale1(4); \ 49 op_scale1(5); \ 50 op_scale1(6); \ 51 op_scale1(7); \ 52 if(W==8) continue; \ 53 op_scale1(8); \ 54 op_scale1(9); \ 55 op_scale1(10); \ 56 op_scale1(11); \ 57 op_scale1(12); \ 58 op_scale1(13); \ 59 op_scale1(14); \ 60 op_scale1(15); \ 61 } \ 62} \ 63static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, int stride, int height, \ 64 int log2_denom, int weightd, int weights, int offset) \ 65{ \ 66 int y; \ 67 pixel *dst = (pixel*)_dst; \ 68 pixel *src = (pixel*)_src; \ 69 stride /= sizeof(pixel); \ 70 offset <<= (BIT_DEPTH-8); \ 71 offset = ((offset + 1) | 1) << log2_denom; \ 72 for (y = 0; y < height; y++, dst += stride, src += stride) { \ 73 op_scale2(0); \ 74 op_scale2(1); \ 75 if(W==2) continue; \ 76 op_scale2(2); \ 77 op_scale2(3); \ 78 if(W==4) continue; \ 79 op_scale2(4); \ 80 op_scale2(5); \ 81 op_scale2(6); \ 82 op_scale2(7); \ 83 if(W==8) continue; \ 84 op_scale2(8); \ 85 op_scale2(9); \ 86 op_scale2(10); \ 87 op_scale2(11); \ 88 op_scale2(12); \ 89 op_scale2(13); \ 90 op_scale2(14); \ 91 op_scale2(15); \ 92 } \ 93} 94 95H264_WEIGHT(16) 96H264_WEIGHT(8) 97H264_WEIGHT(4) 98H264_WEIGHT(2) 99 100#undef op_scale1 101#undef op_scale2 102#undef H264_WEIGHT 103 104static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0) 105{ 106 pixel *pix = (pixel*)_pix; 107 int i, d; 108 xstride /= sizeof(pixel); 109 ystride /= sizeof(pixel); 110 alpha <<= BIT_DEPTH - 8; 111 beta <<= BIT_DEPTH - 8; 112 for( i = 0; i < 4; i++ ) { 113 const int tc_orig = tc0[i] << (BIT_DEPTH - 8); 114 if( tc_orig < 0 ) { 115 pix += inner_iters*ystride; 116 continue; 117 } 118 for( d = 0; d < inner_iters; d++ ) { 119 const int p0 = pix[-1*xstride]; 120 const int p1 = pix[-2*xstride]; 121 const int p2 = pix[-3*xstride]; 122 const int q0 = pix[0]; 123 const int q1 = pix[1*xstride]; 124 const int q2 = pix[2*xstride]; 125 126 if( FFABS( p0 - q0 ) < alpha && 127 FFABS( p1 - p0 ) < beta && 128 FFABS( q1 - q0 ) < beta ) { 129 130 int tc = tc_orig; 131 int i_delta; 132 133 if( FFABS( p2 - p0 ) < beta ) { 134 if(tc_orig) 135 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc_orig, tc_orig ); 136 tc++; 137 } 138 if( FFABS( q2 - q0 ) < beta ) { 139 if(tc_orig) 140 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc_orig, tc_orig ); 141 tc++; 142 } 143 144 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); 145 pix[-xstride] = av_clip_pixel( p0 + i_delta ); /* p0' */ 146 pix[0] = av_clip_pixel( q0 - i_delta ); /* q0' */ 147 } 148 pix += ystride; 149 } 150 } 151} 152static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 153{ 154 FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0); 155} 156static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 157{ 158 FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); 159} 160static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 161{ 162 FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); 163} 164 165static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) 166{ 167 pixel *pix = (pixel*)_pix; 168 int d; 169 xstride /= sizeof(pixel); 170 ystride /= sizeof(pixel); 171 alpha <<= BIT_DEPTH - 8; 172 beta <<= BIT_DEPTH - 8; 173 for( d = 0; d < 4 * inner_iters; d++ ) { 174 const int p2 = pix[-3*xstride]; 175 const int p1 = pix[-2*xstride]; 176 const int p0 = pix[-1*xstride]; 177 178 const int q0 = pix[ 0*xstride]; 179 const int q1 = pix[ 1*xstride]; 180 const int q2 = pix[ 2*xstride]; 181 182 if( FFABS( p0 - q0 ) < alpha && 183 FFABS( p1 - p0 ) < beta && 184 FFABS( q1 - q0 ) < beta ) { 185 186 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ 187 if( FFABS( p2 - p0 ) < beta) 188 { 189 const int p3 = pix[-4*xstride]; 190 /* p0', p1', p2' */ 191 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; 192 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; 193 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; 194 } else { 195 /* p0' */ 196 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; 197 } 198 if( FFABS( q2 - q0 ) < beta) 199 { 200 const int q3 = pix[3*xstride]; 201 /* q0', q1', q2' */ 202 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; 203 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; 204 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; 205 } else { 206 /* q0' */ 207 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; 208 } 209 }else{ 210 /* p0', q0' */ 211 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; 212 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; 213 } 214 } 215 pix += ystride; 216 } 217} 218static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta) 219{ 220 FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta); 221} 222static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta) 223{ 224 FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); 225} 226static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) 227{ 228 FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); 229} 230 231static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0) 232{ 233 pixel *pix = (pixel*)_pix; 234 int i, d; 235 xstride /= sizeof(pixel); 236 ystride /= sizeof(pixel); 237 alpha <<= BIT_DEPTH - 8; 238 beta <<= BIT_DEPTH - 8; 239 for( i = 0; i < 4; i++ ) { 240 const int tc = ((tc0[i] - 1) << (BIT_DEPTH - 8)) + 1; 241 if( tc <= 0 ) { 242 pix += inner_iters*ystride; 243 continue; 244 } 245 for( d = 0; d < inner_iters; d++ ) { 246 const int p0 = pix[-1*xstride]; 247 const int p1 = pix[-2*xstride]; 248 const int q0 = pix[0]; 249 const int q1 = pix[1*xstride]; 250 251 if( FFABS( p0 - q0 ) < alpha && 252 FFABS( p1 - p0 ) < beta && 253 FFABS( q1 - q0 ) < beta ) { 254 255 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); 256 257 pix[-xstride] = av_clip_pixel( p0 + delta ); /* p0' */ 258 pix[0] = av_clip_pixel( q0 - delta ); /* q0' */ 259 } 260 pix += ystride; 261 } 262 } 263} 264static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 265{ 266 FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0); 267} 268static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 269{ 270 FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); 271} 272static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 273{ 274 FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); 275} 276static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 277{ 278 FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); 279} 280static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) 281{ 282 FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); 283} 284 285static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) 286{ 287 pixel *pix = (pixel*)_pix; 288 int d; 289 xstride /= sizeof(pixel); 290 ystride /= sizeof(pixel); 291 alpha <<= BIT_DEPTH - 8; 292 beta <<= BIT_DEPTH - 8; 293 for( d = 0; d < 4 * inner_iters; d++ ) { 294 const int p0 = pix[-1*xstride]; 295 const int p1 = pix[-2*xstride]; 296 const int q0 = pix[0]; 297 const int q1 = pix[1*xstride]; 298 299 if( FFABS( p0 - q0 ) < alpha && 300 FFABS( p1 - p0 ) < beta && 301 FFABS( q1 - q0 ) < beta ) { 302 303 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ 304 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ 305 } 306 pix += ystride; 307 } 308} 309static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta) 310{ 311 FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta); 312} 313static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta) 314{ 315 FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); 316} 317static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) 318{ 319 FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); 320} 321static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, int stride, int alpha, int beta) 322{ 323 FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); 324} 325static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) 326{ 327 FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); 328} 329