1/* 2 * H.26L/H.264/AVC/JVT/14496-10/... loop filter 3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * H.264 / AVC / MPEG4 part10 loop filter. 25 * @author Michael Niedermayer <michaelni@gmx.at> 26 */ 27 28#include "libavutil/intreadwrite.h" 29#include "internal.h" 30#include "dsputil.h" 31#include "avcodec.h" 32#include "mpegvideo.h" 33#include "h264.h" 34#include "mathops.h" 35#include "rectangle.h" 36 37//#undef NDEBUG 38#include <assert.h> 39 40/* Deblocking filter (p153) */ 41static const uint8_t alpha_table[52*3] = { 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, 48 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, 49 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, 50 80, 90,101,113,127,144,162,182,203,226, 51 255,255, 52 255,255,255,255,255,255,255,255,255,255,255,255,255, 53 255,255,255,255,255,255,255,255,255,255,255,255,255, 54 255,255,255,255,255,255,255,255,255,255,255,255,255, 55 255,255,255,255,255,255,255,255,255,255,255,255,255, 56}; 57static const uint8_t beta_table[52*3] = { 58 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, 64 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 65 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 66 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 67 18, 18, 68 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 69 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 70 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 71 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 72}; 73static const uint8_t tc0_table[52*3][4] = { 74 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 75 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 76 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 77 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 78 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 79 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 80 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 81 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 82 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 83 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 84 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, 85 {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 }, 86 {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 }, 87 {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, 88 {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 }, 89 {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 }, 90 {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 }, 91 {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 }, 92 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 93 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 94 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 95 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 96 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 97 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 98 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 99 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 100 {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, 101}; 102 103static void av_always_inline filter_mb_edgev( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h) { 104 const unsigned int index_a = qp + h->slice_alpha_c0_offset; 105 const int alpha = alpha_table[index_a]; 106 const int beta = beta_table[qp + h->slice_beta_offset]; 107 if (alpha ==0 || beta == 0) return; 108 109 if( bS[0] < 4 ) { 110 int8_t tc[4]; 111 tc[0] = tc0_table[index_a][bS[0]]; 112 tc[1] = tc0_table[index_a][bS[1]]; 113 tc[2] = tc0_table[index_a][bS[2]]; 114 tc[3] = tc0_table[index_a][bS[3]]; 115 h->h264dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); 116 } else { 117 h->h264dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta); 118 } 119} 120static void av_always_inline filter_mb_edgecv( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { 121 const unsigned int index_a = qp + h->slice_alpha_c0_offset; 122 const int alpha = alpha_table[index_a]; 123 const int beta = beta_table[qp + h->slice_beta_offset]; 124 if (alpha ==0 || beta == 0) return; 125 126 if( bS[0] < 4 ) { 127 int8_t tc[4]; 128 tc[0] = tc0_table[index_a][bS[0]]+1; 129 tc[1] = tc0_table[index_a][bS[1]]+1; 130 tc[2] = tc0_table[index_a][bS[2]]+1; 131 tc[3] = tc0_table[index_a][bS[3]]+1; 132 h->h264dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); 133 } else { 134 h->h264dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); 135 } 136} 137 138static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { 139 int i; 140 int index_a = qp + h->slice_alpha_c0_offset; 141 int alpha = alpha_table[index_a]; 142 int beta = beta_table[qp + h->slice_beta_offset]; 143 for( i = 0; i < 8; i++, pix += stride) { 144 const int bS_index = (i >> 1) * bsi; 145 146 if( bS[bS_index] == 0 ) { 147 continue; 148 } 149 150 if( bS[bS_index] < 4 ) { 151 const int tc0 = tc0_table[index_a][bS[bS_index]]; 152 const int p0 = pix[-1]; 153 const int p1 = pix[-2]; 154 const int p2 = pix[-3]; 155 const int q0 = pix[0]; 156 const int q1 = pix[1]; 157 const int q2 = pix[2]; 158 159 if( FFABS( p0 - q0 ) < alpha && 160 FFABS( p1 - p0 ) < beta && 161 FFABS( q1 - q0 ) < beta ) { 162 int tc = tc0; 163 int i_delta; 164 165 if( FFABS( p2 - p0 ) < beta ) { 166 if(tc0) 167 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); 168 tc++; 169 } 170 if( FFABS( q2 - q0 ) < beta ) { 171 if(tc0) 172 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); 173 tc++; 174 } 175 176 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); 177 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ 178 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ 179 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); 180 } 181 }else{ 182 const int p0 = pix[-1]; 183 const int p1 = pix[-2]; 184 const int p2 = pix[-3]; 185 186 const int q0 = pix[0]; 187 const int q1 = pix[1]; 188 const int q2 = pix[2]; 189 190 if( FFABS( p0 - q0 ) < alpha && 191 FFABS( p1 - p0 ) < beta && 192 FFABS( q1 - q0 ) < beta ) { 193 194 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ 195 if( FFABS( p2 - p0 ) < beta) 196 { 197 const int p3 = pix[-4]; 198 /* p0', p1', p2' */ 199 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; 200 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; 201 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; 202 } else { 203 /* p0' */ 204 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; 205 } 206 if( FFABS( q2 - q0 ) < beta) 207 { 208 const int q3 = pix[3]; 209 /* q0', q1', q2' */ 210 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; 211 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; 212 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; 213 } else { 214 /* q0' */ 215 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; 216 } 217 }else{ 218 /* p0', q0' */ 219 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; 220 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; 221 } 222 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); 223 } 224 } 225 } 226} 227static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int bsi, int qp ) { 228 int i; 229 int index_a = qp + h->slice_alpha_c0_offset; 230 int alpha = alpha_table[index_a]; 231 int beta = beta_table[qp + h->slice_beta_offset]; 232 for( i = 0; i < 4; i++, pix += stride) { 233 const int bS_index = i*bsi; 234 235 if( bS[bS_index] == 0 ) { 236 continue; 237 } 238 239 if( bS[bS_index] < 4 ) { 240 const int tc = tc0_table[index_a][bS[bS_index]] + 1; 241 const int p0 = pix[-1]; 242 const int p1 = pix[-2]; 243 const int q0 = pix[0]; 244 const int q1 = pix[1]; 245 246 if( FFABS( p0 - q0 ) < alpha && 247 FFABS( p1 - p0 ) < beta && 248 FFABS( q1 - q0 ) < beta ) { 249 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); 250 251 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */ 252 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ 253 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); 254 } 255 }else{ 256 const int p0 = pix[-1]; 257 const int p1 = pix[-2]; 258 const int q0 = pix[0]; 259 const int q1 = pix[1]; 260 261 if( FFABS( p0 - q0 ) < alpha && 262 FFABS( p1 - p0 ) < beta && 263 FFABS( q1 - q0 ) < beta ) { 264 265 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ 266 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ 267 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); 268 } 269 } 270 } 271} 272 273static void av_always_inline filter_mb_edgeh( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { 274 const unsigned int index_a = qp + h->slice_alpha_c0_offset; 275 const int alpha = alpha_table[index_a]; 276 const int beta = beta_table[qp + h->slice_beta_offset]; 277 if (alpha ==0 || beta == 0) return; 278 279 if( bS[0] < 4 ) { 280 int8_t tc[4]; 281 tc[0] = tc0_table[index_a][bS[0]]; 282 tc[1] = tc0_table[index_a][bS[1]]; 283 tc[2] = tc0_table[index_a][bS[2]]; 284 tc[3] = tc0_table[index_a][bS[3]]; 285 h->h264dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); 286 } else { 287 h->h264dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta); 288 } 289} 290 291static void av_always_inline filter_mb_edgech( uint8_t *pix, int stride, int16_t bS[4], unsigned int qp, H264Context *h ) { 292 const unsigned int index_a = qp + h->slice_alpha_c0_offset; 293 const int alpha = alpha_table[index_a]; 294 const int beta = beta_table[qp + h->slice_beta_offset]; 295 if (alpha ==0 || beta == 0) return; 296 297 if( bS[0] < 4 ) { 298 int8_t tc[4]; 299 tc[0] = tc0_table[index_a][bS[0]]+1; 300 tc[1] = tc0_table[index_a][bS[1]]+1; 301 tc[2] = tc0_table[index_a][bS[2]]+1; 302 tc[3] = tc0_table[index_a][bS[3]]+1; 303 h->h264dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); 304 } else { 305 h->h264dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); 306 } 307} 308 309void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { 310 MpegEncContext * const s = &h->s; 311 int mb_xy; 312 int mb_type, left_type; 313 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; 314 315 mb_xy = h->mb_xy; 316 317 if(!h->top_type || !h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { 318 ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); 319 return; 320 } 321 assert(!FRAME_MBAFF); 322 left_type= h->left_type[0]; 323 324 mb_type = s->current_picture.mb_type[mb_xy]; 325 qp = s->current_picture.qscale_table[mb_xy]; 326 qp0 = s->current_picture.qscale_table[mb_xy-1]; 327 qp1 = s->current_picture.qscale_table[h->top_mb_xy]; 328 qpc = get_chroma_qp( h, 0, qp ); 329 qpc0 = get_chroma_qp( h, 0, qp0 ); 330 qpc1 = get_chroma_qp( h, 0, qp1 ); 331 qp0 = (qp + qp0 + 1) >> 1; 332 qp1 = (qp + qp1 + 1) >> 1; 333 qpc0 = (qpc + qpc0 + 1) >> 1; 334 qpc1 = (qpc + qpc1 + 1) >> 1; 335 qp_thresh = 15+52 - h->slice_alpha_c0_offset; 336 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh && 337 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh) 338 return; 339 340 if( IS_INTRA(mb_type) ) { 341 int16_t bS4[4] = {4,4,4,4}; 342 int16_t bS3[4] = {3,3,3,3}; 343 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4; 344 if(left_type) 345 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); 346 if( IS_8x8DCT(mb_type) ) { 347 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); 348 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); 349 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); 350 } else { 351 filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h); 352 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); 353 filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h); 354 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); 355 filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h); 356 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); 357 filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h); 358 } 359 if(left_type){ 360 filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); 361 filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h); 362 } 363 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); 364 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); 365 filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); 366 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); 367 filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); 368 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); 369 return; 370 } else { 371 LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]); 372 int edges; 373 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { 374 edges = 4; 375 AV_WN64A(bS[0][0], 0x0002000200020002ULL); 376 AV_WN64A(bS[0][2], 0x0002000200020002ULL); 377 AV_WN64A(bS[1][0], 0x0002000200020002ULL); 378 AV_WN64A(bS[1][2], 0x0002000200020002ULL); 379 } else { 380 int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0; 381 int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0; 382 int step = 1+(mb_type>>24); //IS_8x8DCT(mb_type) ? 2 : 1; 383 edges = 4 - 3*((mb_type>>3) & !(h->cbp & 15)); //(mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; 384 h->h264dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, 385 h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); 386 } 387 if( IS_INTRA(left_type) ) 388 AV_WN64A(bS[0][0], 0x0004000400040004ULL); 389 if( IS_INTRA(h->top_type) ) 390 AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL); 391 392#define FILTER(hv,dir,edge)\ 393 if(AV_RN64A(bS[dir][edge])) { \ 394 filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\ 395 if(!(edge&1)) {\ 396 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ 397 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ 398 }\ 399 } 400 if(left_type) 401 FILTER(v,0,0); 402 if( edges == 1 ) { 403 FILTER(h,1,0); 404 } else if( IS_8x8DCT(mb_type) ) { 405 FILTER(v,0,2); 406 FILTER(h,1,0); 407 FILTER(h,1,2); 408 } else { 409 FILTER(v,0,1); 410 FILTER(v,0,2); 411 FILTER(v,0,3); 412 FILTER(h,1,0); 413 FILTER(h,1,1); 414 FILTER(h,1,2); 415 FILTER(h,1,3); 416 } 417#undef FILTER 418 } 419} 420 421static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){ 422 int v; 423 424 v= h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx]; 425 if(!v && h->ref_cache[0][b_idx]!=-1) 426 v= h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U | 427 FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit; 428 429 if(h->list_count==2){ 430 if(!v) 431 v = h->ref_cache[1][b_idx] != h->ref_cache[1][bn_idx] | 432 h->mv_cache[1][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U | 433 FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit; 434 435 if(v){ 436 if(h->ref_cache[0][b_idx] != h->ref_cache[1][bn_idx] | 437 h->ref_cache[1][b_idx] != h->ref_cache[0][bn_idx]) 438 return 1; 439 return 440 h->mv_cache[0][b_idx][0] - h->mv_cache[1][bn_idx][0] + 3 >= 7U | 441 FFABS( h->mv_cache[0][b_idx][1] - h->mv_cache[1][bn_idx][1] ) >= mvy_limit | 442 h->mv_cache[1][b_idx][0] - h->mv_cache[0][bn_idx][0] + 3 >= 7U | 443 FFABS( h->mv_cache[1][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= mvy_limit; 444 } 445 } 446 447 return v; 448} 449 450static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) { 451 MpegEncContext * const s = &h->s; 452 int edge; 453 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; 454 const int mbm_type = dir == 0 ? h->left_type[0] : h->top_type; 455 456 // how often to recheck mv-based bS when iterating between edges 457 static const uint8_t mask_edge_tab[2][8]={{0,3,3,3,1,1,1,1}, 458 {0,3,1,1,3,3,3,3}}; 459 const int mask_edge = mask_edge_tab[dir][(mb_type>>3)&7]; 460 const int edges = mask_edge== 3 && !(h->cbp&15) ? 1 : 4; 461 462 // how often to recheck mv-based bS when iterating along each edge 463 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)); 464 465 if(mbm_type && !first_vertical_edge_done){ 466 467 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) 468 && IS_INTERLACED(mbm_type&~mb_type) 469 ) { 470 // This is a special case in the norm where the filtering must 471 // be done twice (one each of the field) even if we are in a 472 // frame macroblock. 473 // 474 unsigned int tmp_linesize = 2 * linesize; 475 unsigned int tmp_uvlinesize = 2 * uvlinesize; 476 int mbn_xy = mb_xy - 2 * s->mb_stride; 477 int j; 478 479 for(j=0; j<2; j++, mbn_xy += s->mb_stride){ 480 DECLARE_ALIGNED(8, int16_t, bS)[4]; 481 int qp; 482 if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) { 483 AV_WN64A(bS, 0x0003000300030003ULL); 484 } else { 485 if(!CABAC && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])){ 486 bS[0]= 1+((h->cbp_table[mbn_xy] & 4)||h->non_zero_count_cache[scan8[0]+0]); 487 bS[1]= 1+((h->cbp_table[mbn_xy] & 4)||h->non_zero_count_cache[scan8[0]+1]); 488 bS[2]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+2]); 489 bS[3]= 1+((h->cbp_table[mbn_xy] & 8)||h->non_zero_count_cache[scan8[0]+3]); 490 }else{ 491 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy] + 4+3*8; 492 int i; 493 for( i = 0; i < 4; i++ ) { 494 bS[i] = 1 + !!(h->non_zero_count_cache[scan8[0]+i] | mbn_nnz[i]); 495 } 496 } 497 } 498 // Do not use s->qscale as luma quantizer because it has not the same 499 // value in IPCM macroblocks. 500 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; 501 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); 502 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 503 filter_mb_edgeh( &img_y[j*linesize], tmp_linesize, bS, qp, h ); 504 filter_mb_edgech( &img_cb[j*uvlinesize], tmp_uvlinesize, bS, 505 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); 506 filter_mb_edgech( &img_cr[j*uvlinesize], tmp_uvlinesize, bS, 507 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1, h); 508 } 509 }else{ 510 DECLARE_ALIGNED(8, int16_t, bS)[4]; 511 int qp; 512 513 if( IS_INTRA(mb_type|mbm_type)) { 514 AV_WN64A(bS, 0x0003000300030003ULL); 515 if ( (!IS_INTERLACED(mb_type|mbm_type)) 516 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0)) 517 ) 518 AV_WN64A(bS, 0x0004000400040004ULL); 519 } else { 520 int i; 521 int mv_done; 522 523 if( dir && FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) { 524 AV_WN64A(bS, 0x0001000100010001ULL); 525 mv_done = 1; 526 } 527 else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { 528 int b_idx= 8 + 4; 529 int bn_idx= b_idx - (dir ? 8:1); 530 531 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, 8 + 4, bn_idx, mvy_limit); 532 mv_done = 1; 533 } 534 else 535 mv_done = 0; 536 537 for( i = 0; i < 4; i++ ) { 538 int x = dir == 0 ? 0 : i; 539 int y = dir == 0 ? i : 0; 540 int b_idx= 8 + 4 + x + 8*y; 541 int bn_idx= b_idx - (dir ? 8:1); 542 543 if( h->non_zero_count_cache[b_idx] | 544 h->non_zero_count_cache[bn_idx] ) { 545 bS[i] = 2; 546 } 547 else if(!mv_done) 548 { 549 bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit); 550 } 551 } 552 } 553 554 /* Filter edge */ 555 // Do not use s->qscale as luma quantizer because it has not the same 556 // value in IPCM macroblocks. 557 if(bS[0]+bS[1]+bS[2]+bS[3]){ 558 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbm_xy] + 1 ) >> 1; 559 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); 560 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); 561 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 562 if( dir == 0 ) { 563 filter_mb_edgev( &img_y[0], linesize, bS, qp, h ); 564 { 565 int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; 566 filter_mb_edgecv( &img_cb[0], uvlinesize, bS, qp, h); 567 if(h->pps.chroma_qp_diff) 568 qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; 569 filter_mb_edgecv( &img_cr[0], uvlinesize, bS, qp, h); 570 } 571 } else { 572 filter_mb_edgeh( &img_y[0], linesize, bS, qp, h ); 573 { 574 int qp= ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; 575 filter_mb_edgech( &img_cb[0], uvlinesize, bS, qp, h); 576 if(h->pps.chroma_qp_diff) 577 qp= ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbm_xy] ) + 1 ) >> 1; 578 filter_mb_edgech( &img_cr[0], uvlinesize, bS, qp, h); 579 } 580 } 581 } 582 } 583 } 584 585 /* Calculate bS */ 586 for( edge = 1; edge < edges; edge++ ) { 587 DECLARE_ALIGNED(8, int16_t, bS)[4]; 588 int qp; 589 590 if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) 591 continue; 592 593 if( IS_INTRA(mb_type)) { 594 AV_WN64A(bS, 0x0003000300030003ULL); 595 } else { 596 int i; 597 int mv_done; 598 599 if( edge & mask_edge ) { 600 AV_ZERO64(bS); 601 mv_done = 1; 602 } 603 else if( mask_par0 ) { 604 int b_idx= 8 + 4 + edge * (dir ? 8:1); 605 int bn_idx= b_idx - (dir ? 8:1); 606 607 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, b_idx, bn_idx, mvy_limit); 608 mv_done = 1; 609 } 610 else 611 mv_done = 0; 612 613 for( i = 0; i < 4; i++ ) { 614 int x = dir == 0 ? edge : i; 615 int y = dir == 0 ? i : edge; 616 int b_idx= 8 + 4 + x + 8*y; 617 int bn_idx= b_idx - (dir ? 8:1); 618 619 if( h->non_zero_count_cache[b_idx] | 620 h->non_zero_count_cache[bn_idx] ) { 621 bS[i] = 2; 622 } 623 else if(!mv_done) 624 { 625 bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit); 626 } 627 } 628 629 if(bS[0]+bS[1]+bS[2]+bS[3] == 0) 630 continue; 631 } 632 633 /* Filter edge */ 634 // Do not use s->qscale as luma quantizer because it has not the same 635 // value in IPCM macroblocks. 636 qp = s->current_picture.qscale_table[mb_xy]; 637 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); 638 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); 639 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 640 if( dir == 0 ) { 641 filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h ); 642 if( (edge&1) == 0 ) { 643 filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, h->chroma_qp[0], h); 644 filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, h->chroma_qp[1], h); 645 } 646 } else { 647 filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h ); 648 if( (edge&1) == 0 ) { 649 filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], h); 650 filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], h); 651 } 652 } 653 } 654} 655 656void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { 657 MpegEncContext * const s = &h->s; 658 const int mb_xy= mb_x + mb_y*s->mb_stride; 659 const int mb_type = s->current_picture.mb_type[mb_xy]; 660 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; 661 int first_vertical_edge_done = 0; 662 av_unused int dir; 663 664 if (FRAME_MBAFF 665 // and current and left pair do not have the same interlaced type 666 && IS_INTERLACED(mb_type^h->left_type[0]) 667 // and left mb is in available to us 668 && h->left_type[0]) { 669 /* First vertical edge is different in MBAFF frames 670 * There are 8 different bS to compute and 2 different Qp 671 */ 672 DECLARE_ALIGNED(8, int16_t, bS)[8]; 673 int qp[2]; 674 int bqp[2]; 675 int rqp[2]; 676 int mb_qp, mbn0_qp, mbn1_qp; 677 int i; 678 first_vertical_edge_done = 1; 679 680 if( IS_INTRA(mb_type) ) { 681 AV_WN64A(&bS[0], 0x0004000400040004ULL); 682 AV_WN64A(&bS[4], 0x0004000400040004ULL); 683 } else { 684 static const uint8_t offset[2][2][8]={ 685 { 686 {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1}, 687 {7+8*2, 7+8*2, 7+8*2, 7+8*2, 7+8*3, 7+8*3, 7+8*3, 7+8*3}, 688 },{ 689 {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, 690 {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3}, 691 } 692 }; 693 const uint8_t *off= offset[MB_FIELD][mb_y&1]; 694 for( i = 0; i < 8; i++ ) { 695 int j= MB_FIELD ? i>>2 : i&1; 696 int mbn_xy = h->left_mb_xy[j]; 697 int mbn_type= h->left_type[j]; 698 699 if( IS_INTRA( mbn_type ) ) 700 bS[i] = 4; 701 else{ 702 bS[i] = 1 + !!(h->non_zero_count_cache[12+8*(i>>1)] | 703 ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ? 704 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) 705 : 706 h->non_zero_count[mbn_xy][ off[i] ])); 707 } 708 } 709 } 710 711 mb_qp = s->current_picture.qscale_table[mb_xy]; 712 mbn0_qp = s->current_picture.qscale_table[h->left_mb_xy[0]]; 713 mbn1_qp = s->current_picture.qscale_table[h->left_mb_xy[1]]; 714 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1; 715 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) + 716 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1; 717 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) + 718 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1; 719 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1; 720 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) + 721 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1; 722 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) + 723 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1; 724 725 /* Filter edge */ 726 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize); 727 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } 728 if(MB_FIELD){ 729 filter_mb_mbaff_edgev ( h, img_y , linesize, bS , 1, qp [0] ); 730 filter_mb_mbaff_edgev ( h, img_y + 8* linesize, linesize, bS+4, 1, qp [1] ); 731 filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0] ); 732 filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1] ); 733 filter_mb_mbaff_edgecv( h, img_cr, uvlinesize, bS , 1, rqp[0] ); 734 filter_mb_mbaff_edgecv( h, img_cr + 4*uvlinesize, uvlinesize, bS+4, 1, rqp[1] ); 735 }else{ 736 filter_mb_mbaff_edgev ( h, img_y , 2* linesize, bS , 2, qp [0] ); 737 filter_mb_mbaff_edgev ( h, img_y + linesize, 2* linesize, bS+1, 2, qp [1] ); 738 filter_mb_mbaff_edgecv( h, img_cb, 2*uvlinesize, bS , 2, bqp[0] ); 739 filter_mb_mbaff_edgecv( h, img_cb + uvlinesize, 2*uvlinesize, bS+1, 2, bqp[1] ); 740 filter_mb_mbaff_edgecv( h, img_cr, 2*uvlinesize, bS , 2, rqp[0] ); 741 filter_mb_mbaff_edgecv( h, img_cr + uvlinesize, 2*uvlinesize, bS+1, 2, rqp[1] ); 742 } 743 } 744 745#if CONFIG_SMALL 746 for( dir = 0; dir < 2; dir++ ) 747 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir); 748#else 749 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0); 750 filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1); 751#endif 752} 753