1/* 2 * SVQ1 Encoder 3 * Copyright (C) 2004 Mike Melanson <melanson@pcisys.net> 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * Sorenson Vector Quantizer #1 (SVQ1) video codec. 25 * For more information of the SVQ1 algorithm, visit: 26 * http://www.pcisys.net/~melanson/codecs/ 27 */ 28 29 30#include "avcodec.h" 31#include "dsputil.h" 32#include "mpegvideo.h" 33#include "h263.h" 34#include "internal.h" 35 36#include "svq1.h" 37#include "svq1enc_cb.h" 38 39#undef NDEBUG 40#include <assert.h> 41 42 43typedef struct SVQ1Context { 44 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) 45 AVCodecContext *avctx; 46 DSPContext dsp; 47 AVFrame picture; 48 AVFrame current_picture; 49 AVFrame last_picture; 50 PutBitContext pb; 51 GetBitContext gb; 52 53 PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex 54 55 int frame_width; 56 int frame_height; 57 58 /* Y plane block dimensions */ 59 int y_block_width; 60 int y_block_height; 61 62 /* U & V plane (C planes) block dimensions */ 63 int c_block_width; 64 int c_block_height; 65 66 uint16_t *mb_type; 67 uint32_t *dummy; 68 int16_t (*motion_val8[3])[2]; 69 int16_t (*motion_val16[3])[2]; 70 71 int64_t rd_total; 72 73 uint8_t *scratchbuf; 74} SVQ1Context; 75 76static void svq1_write_header(SVQ1Context *s, int frame_type) 77{ 78 int i; 79 80 /* frame code */ 81 put_bits(&s->pb, 22, 0x20); 82 83 /* temporal reference (sure hope this is a "don't care") */ 84 put_bits(&s->pb, 8, 0x00); 85 86 /* frame type */ 87 put_bits(&s->pb, 2, frame_type - 1); 88 89 if (frame_type == FF_I_TYPE) { 90 91 /* no checksum since frame code is 0x20 */ 92 93 /* no embedded string either */ 94 95 /* output 5 unknown bits (2 + 2 + 1) */ 96 put_bits(&s->pb, 5, 2); /* 2 needed by quicktime decoder */ 97 98 i= ff_match_2uint16(ff_svq1_frame_size_table, FF_ARRAY_ELEMS(ff_svq1_frame_size_table), s->frame_width, s->frame_height); 99 put_bits(&s->pb, 3, i); 100 101 if (i == 7) 102 { 103 put_bits(&s->pb, 12, s->frame_width); 104 put_bits(&s->pb, 12, s->frame_height); 105 } 106 } 107 108 /* no checksum or extra data (next 2 bits get 0) */ 109 put_bits(&s->pb, 2, 0); 110} 111 112 113#define QUALITY_THRESHOLD 100 114#define THRESHOLD_MULTIPLIER 0.6 115 116#if HAVE_ALTIVEC 117#undef vector 118#endif 119 120static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){ 121 int count, y, x, i, j, split, best_mean, best_score, best_count; 122 int best_vector[6]; 123 int block_sum[7]= {0, 0, 0, 0, 0, 0}; 124 int w= 2<<((level+2)>>1); 125 int h= 2<<((level+1)>>1); 126 int size=w*h; 127 int16_t block[7][256]; 128 const int8_t *codebook_sum, *codebook; 129 const uint16_t (*mean_vlc)[2]; 130 const uint8_t (*multistage_vlc)[2]; 131 132 best_score=0; 133 //FIXME optimize, this doenst need to be done multiple times 134 if(intra){ 135 codebook_sum= svq1_intra_codebook_sum[level]; 136 codebook= ff_svq1_intra_codebooks[level]; 137 mean_vlc= ff_svq1_intra_mean_vlc; 138 multistage_vlc= ff_svq1_intra_multistage_vlc[level]; 139 for(y=0; y<h; y++){ 140 for(x=0; x<w; x++){ 141 int v= src[x + y*stride]; 142 block[0][x + w*y]= v; 143 best_score += v*v; 144 block_sum[0] += v; 145 } 146 } 147 }else{ 148 codebook_sum= svq1_inter_codebook_sum[level]; 149 codebook= ff_svq1_inter_codebooks[level]; 150 mean_vlc= ff_svq1_inter_mean_vlc + 256; 151 multistage_vlc= ff_svq1_inter_multistage_vlc[level]; 152 for(y=0; y<h; y++){ 153 for(x=0; x<w; x++){ 154 int v= src[x + y*stride] - ref[x + y*stride]; 155 block[0][x + w*y]= v; 156 best_score += v*v; 157 block_sum[0] += v; 158 } 159 } 160 } 161 162 best_count=0; 163 best_score -= ((block_sum[0]*block_sum[0])>>(level+3)); 164 best_mean= (block_sum[0] + (size>>1)) >> (level+3); 165 166 if(level<4){ 167 for(count=1; count<7; count++){ 168 int best_vector_score= INT_MAX; 169 int best_vector_sum=-999, best_vector_mean=-999; 170 const int stage= count-1; 171 const int8_t *vector; 172 173 for(i=0; i<16; i++){ 174 int sum= codebook_sum[stage*16 + i]; 175 int sqr, diff, score; 176 177 vector = codebook + stage*size*16 + i*size; 178 sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size); 179 diff= block_sum[stage] - sum; 180 score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow 181 if(score < best_vector_score){ 182 int mean= (diff + (size>>1)) >> (level+3); 183 assert(mean >-300 && mean<300); 184 mean= av_clip(mean, intra?0:-256, 255); 185 best_vector_score= score; 186 best_vector[stage]= i; 187 best_vector_sum= sum; 188 best_vector_mean= mean; 189 } 190 } 191 assert(best_vector_mean != -999); 192 vector= codebook + stage*size*16 + best_vector[stage]*size; 193 for(j=0; j<size; j++){ 194 block[stage+1][j] = block[stage][j] - vector[j]; 195 } 196 block_sum[stage+1]= block_sum[stage] - best_vector_sum; 197 best_vector_score += 198 lambda*(+ 1 + 4*count 199 + multistage_vlc[1+count][1] 200 + mean_vlc[best_vector_mean][1]); 201 202 if(best_vector_score < best_score){ 203 best_score= best_vector_score; 204 best_count= count; 205 best_mean= best_vector_mean; 206 } 207 } 208 } 209 210 split=0; 211 if(best_score > threshold && level){ 212 int score=0; 213 int offset= (level&1) ? stride*h/2 : w/2; 214 PutBitContext backup[6]; 215 216 for(i=level-1; i>=0; i--){ 217 backup[i]= s->reorder_pb[i]; 218 } 219 score += encode_block(s, src , ref , decoded , stride, level-1, threshold>>1, lambda, intra); 220 score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra); 221 score += lambda; 222 223 if(score < best_score){ 224 best_score= score; 225 split=1; 226 }else{ 227 for(i=level-1; i>=0; i--){ 228 s->reorder_pb[i]= backup[i]; 229 } 230 } 231 } 232 if (level > 0) 233 put_bits(&s->reorder_pb[level], 1, split); 234 235 if(!split){ 236 assert((best_mean >= 0 && best_mean<256) || !intra); 237 assert(best_mean >= -256 && best_mean<256); 238 assert(best_count >=0 && best_count<7); 239 assert(level<4 || best_count==0); 240 241 /* output the encoding */ 242 put_bits(&s->reorder_pb[level], 243 multistage_vlc[1 + best_count][1], 244 multistage_vlc[1 + best_count][0]); 245 put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1], 246 mean_vlc[best_mean][0]); 247 248 for (i = 0; i < best_count; i++){ 249 assert(best_vector[i]>=0 && best_vector[i]<16); 250 put_bits(&s->reorder_pb[level], 4, best_vector[i]); 251 } 252 253 for(y=0; y<h; y++){ 254 for(x=0; x<w; x++){ 255 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean; 256 } 257 } 258 } 259 260 return best_score; 261} 262 263 264static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane, 265 int width, int height, int src_stride, int stride) 266{ 267 int x, y; 268 int i; 269 int block_width, block_height; 270 int level; 271 int threshold[6]; 272 uint8_t *src = s->scratchbuf + stride * 16; 273 const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT); 274 275 /* figure out the acceptable level thresholds in advance */ 276 threshold[5] = QUALITY_THRESHOLD; 277 for (level = 4; level >= 0; level--) 278 threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER; 279 280 block_width = (width + 15) / 16; 281 block_height = (height + 15) / 16; 282 283 if(s->picture.pict_type == FF_P_TYPE){ 284 s->m.avctx= s->avctx; 285 s->m.current_picture_ptr= &s->m.current_picture; 286 s->m.last_picture_ptr = &s->m.last_picture; 287 s->m.last_picture.data[0]= ref_plane; 288 s->m.linesize= 289 s->m.last_picture.linesize[0]= 290 s->m.new_picture.linesize[0]= 291 s->m.current_picture.linesize[0]= stride; 292 s->m.width= width; 293 s->m.height= height; 294 s->m.mb_width= block_width; 295 s->m.mb_height= block_height; 296 s->m.mb_stride= s->m.mb_width+1; 297 s->m.b8_stride= 2*s->m.mb_width+1; 298 s->m.f_code=1; 299 s->m.pict_type= s->picture.pict_type; 300 s->m.me_method= s->avctx->me_method; 301 s->m.me.scene_change_score=0; 302 s->m.flags= s->avctx->flags; 303// s->m.out_format = FMT_H263; 304// s->m.unrestricted_mv= 1; 305 306 s->m.lambda= s->picture.quality; 307 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); 308 s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; 309 310 if(!s->motion_val8[plane]){ 311 s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t)); 312 s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t)); 313 } 314 315 s->m.mb_type= s->mb_type; 316 317 //dummies, to avoid segfaults 318 s->m.current_picture.mb_mean= (uint8_t *)s->dummy; 319 s->m.current_picture.mb_var= (uint16_t*)s->dummy; 320 s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy; 321 s->m.current_picture.mb_type= s->dummy; 322 323 s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2; 324 s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1; 325 s->m.dsp= s->dsp; //move 326 ff_init_me(&s->m); 327 328 s->m.me.dia_size= s->avctx->dia_size; 329 s->m.first_slice_line=1; 330 for (y = 0; y < block_height; y++) { 331 s->m.new_picture.data[0]= src - y*16*stride; //ugly 332 s->m.mb_y= y; 333 334 for(i=0; i<16 && i + 16*y<height; i++){ 335 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width); 336 for(x=width; x<16*block_width; x++) 337 src[i*stride+x]= src[i*stride+x-1]; 338 } 339 for(; i<16 && i + 16*y<16*block_height; i++) 340 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width); 341 342 for (x = 0; x < block_width; x++) { 343 s->m.mb_x= x; 344 ff_init_block_index(&s->m); 345 ff_update_block_index(&s->m); 346 347 ff_estimate_p_frame_motion(&s->m, x, y); 348 } 349 s->m.first_slice_line=0; 350 } 351 352 ff_fix_long_p_mvs(&s->m); 353 ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0); 354 } 355 356 s->m.first_slice_line=1; 357 for (y = 0; y < block_height; y++) { 358 for(i=0; i<16 && i + 16*y<height; i++){ 359 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width); 360 for(x=width; x<16*block_width; x++) 361 src[i*stride+x]= src[i*stride+x-1]; 362 } 363 for(; i<16 && i + 16*y<16*block_height; i++) 364 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width); 365 366 s->m.mb_y= y; 367 for (x = 0; x < block_width; x++) { 368 uint8_t reorder_buffer[3][6][7*32]; 369 int count[3][6]; 370 int offset = y * 16 * stride + x * 16; 371 uint8_t *decoded= decoded_plane + offset; 372 uint8_t *ref= ref_plane + offset; 373 int score[4]={0,0,0,0}, best; 374 uint8_t *temp = s->scratchbuf; 375 376 if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size 377 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); 378 return -1; 379 } 380 381 s->m.mb_x= x; 382 ff_init_block_index(&s->m); 383 ff_update_block_index(&s->m); 384 385 if(s->picture.pict_type == FF_I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){ 386 for(i=0; i<6; i++){ 387 init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32); 388 } 389 if(s->picture.pict_type == FF_P_TYPE){ 390 const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTRA]; 391 put_bits(&s->reorder_pb[5], vlc[1], vlc[0]); 392 score[0]= vlc[1]*lambda; 393 } 394 score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1); 395 for(i=0; i<6; i++){ 396 count[0][i]= put_bits_count(&s->reorder_pb[i]); 397 flush_put_bits(&s->reorder_pb[i]); 398 } 399 }else 400 score[0]= INT_MAX; 401 402 best=0; 403 404 if(s->picture.pict_type == FF_P_TYPE){ 405 const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTER]; 406 int mx, my, pred_x, pred_y, dxy; 407 int16_t *motion_ptr; 408 409 motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y); 410 if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){ 411 for(i=0; i<6; i++) 412 init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32); 413 414 put_bits(&s->reorder_pb[5], vlc[1], vlc[0]); 415 416 s->m.pb= s->reorder_pb[5]; 417 mx= motion_ptr[0]; 418 my= motion_ptr[1]; 419 assert(mx>=-32 && mx<=31); 420 assert(my>=-32 && my<=31); 421 assert(pred_x>=-32 && pred_x<=31); 422 assert(pred_y>=-32 && pred_y<=31); 423 ff_h263_encode_motion(&s->m, mx - pred_x, 1); 424 ff_h263_encode_motion(&s->m, my - pred_y, 1); 425 s->reorder_pb[5]= s->m.pb; 426 score[1] += lambda*put_bits_count(&s->reorder_pb[5]); 427 428 dxy= (mx&1) + 2*(my&1); 429 430 s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16); 431 432 score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0); 433 best= score[1] <= score[0]; 434 435 vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_SKIP]; 436 score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16); 437 score[2]+= vlc[1]*lambda; 438 if(score[2] < score[best] && mx==0 && my==0){ 439 best=2; 440 s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16); 441 for(i=0; i<6; i++){ 442 count[2][i]=0; 443 } 444 put_bits(&s->pb, vlc[1], vlc[0]); 445 } 446 } 447 448 if(best==1){ 449 for(i=0; i<6; i++){ 450 count[1][i]= put_bits_count(&s->reorder_pb[i]); 451 flush_put_bits(&s->reorder_pb[i]); 452 } 453 }else{ 454 motion_ptr[0 ] = motion_ptr[1 ]= 455 motion_ptr[2 ] = motion_ptr[3 ]= 456 motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]= 457 motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0; 458 } 459 } 460 461 s->rd_total += score[best]; 462 463 for(i=5; i>=0; i--){ 464 ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]); 465 } 466 if(best==0){ 467 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16); 468 } 469 } 470 s->m.first_slice_line=0; 471 } 472 return 0; 473} 474 475static av_cold int svq1_encode_init(AVCodecContext *avctx) 476{ 477 SVQ1Context * const s = avctx->priv_data; 478 479 dsputil_init(&s->dsp, avctx); 480 avctx->coded_frame= (AVFrame*)&s->picture; 481 482 s->frame_width = avctx->width; 483 s->frame_height = avctx->height; 484 485 s->y_block_width = (s->frame_width + 15) / 16; 486 s->y_block_height = (s->frame_height + 15) / 16; 487 488 s->c_block_width = (s->frame_width / 4 + 15) / 16; 489 s->c_block_height = (s->frame_height / 4 + 15) / 16; 490 491 s->avctx= avctx; 492 s->m.avctx= avctx; 493 s->m.me.temp = 494 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); 495 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); 496 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); 497 s->mb_type = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t)); 498 s->dummy = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t)); 499 h263_encode_init(&s->m); //mv_penalty 500 501 return 0; 502} 503 504static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf, 505 int buf_size, void *data) 506{ 507 SVQ1Context * const s = avctx->priv_data; 508 AVFrame *pict = data; 509 AVFrame * const p= (AVFrame*)&s->picture; 510 AVFrame temp; 511 int i; 512 513 if(avctx->pix_fmt != PIX_FMT_YUV410P){ 514 av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n"); 515 return -1; 516 } 517 518 if(!s->current_picture.data[0]){ 519 avctx->get_buffer(avctx, &s->current_picture); 520 avctx->get_buffer(avctx, &s->last_picture); 521 s->scratchbuf = av_malloc(s->current_picture.linesize[0] * 16 * 2); 522 } 523 524 temp= s->current_picture; 525 s->current_picture= s->last_picture; 526 s->last_picture= temp; 527 528 init_put_bits(&s->pb, buf, buf_size); 529 530 *p = *pict; 531 p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ? FF_P_TYPE : FF_I_TYPE; 532 p->key_frame = p->pict_type == FF_I_TYPE; 533 534 svq1_write_header(s, p->pict_type); 535 for(i=0; i<3; i++){ 536 if(svq1_encode_plane(s, i, 537 s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i], 538 s->frame_width / (i?4:1), s->frame_height / (i?4:1), 539 s->picture.linesize[i], s->current_picture.linesize[i]) < 0) 540 return -1; 541 } 542 543// align_put_bits(&s->pb); 544 while(put_bits_count(&s->pb) & 31) 545 put_bits(&s->pb, 1, 0); 546 547 flush_put_bits(&s->pb); 548 549 return put_bits_count(&s->pb) / 8; 550} 551 552static av_cold int svq1_encode_end(AVCodecContext *avctx) 553{ 554 SVQ1Context * const s = avctx->priv_data; 555 int i; 556 557 av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number)); 558 559 av_freep(&s->m.me.scratchpad); 560 av_freep(&s->m.me.map); 561 av_freep(&s->m.me.score_map); 562 av_freep(&s->mb_type); 563 av_freep(&s->dummy); 564 av_freep(&s->scratchbuf); 565 566 for(i=0; i<3; i++){ 567 av_freep(&s->motion_val8[i]); 568 av_freep(&s->motion_val16[i]); 569 } 570 571 return 0; 572} 573 574 575AVCodec svq1_encoder = { 576 "svq1", 577 AVMEDIA_TYPE_VIDEO, 578 CODEC_ID_SVQ1, 579 sizeof(SVQ1Context), 580 svq1_encode_init, 581 svq1_encode_frame, 582 svq1_encode_end, 583 .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV410P, PIX_FMT_NONE}, 584 .long_name= NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"), 585}; 586