1/* 2 * RoQ Video Encoder. 3 * 4 * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com> 5 * Copyright (C) 2004-2007 Eric Lasota 6 * Based on RoQ specs (C) 2001 Tim Ferguson 7 * 8 * This file is part of FFmpeg. 9 * 10 * FFmpeg is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * FFmpeg is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with FFmpeg; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25/** 26 * @file 27 * id RoQ encoder by Vitor. Based on the Switchblade3 library and the 28 * Switchblade3 FFmpeg glue by Eric Lasota. 29 */ 30 31/* 32 * COSTS: 33 * Level 1: 34 * SKIP - 2 bits 35 * MOTION - 2 + 8 bits 36 * CODEBOOK - 2 + 8 bits 37 * SUBDIVIDE - 2 + combined subcel cost 38 * 39 * Level 2: 40 * SKIP - 2 bits 41 * MOTION - 2 + 8 bits 42 * CODEBOOK - 2 + 8 bits 43 * SUBDIVIDE - 2 + 4*8 bits 44 * 45 * Maximum cost: 138 bits per cel 46 * 47 * Proper evaluation requires LCD fraction comparison, which requires 48 * Squared Error (SE) loss * savings increase 49 * 50 * Maximum savings increase: 136 bits 51 * Maximum SE loss without overflow: 31580641 52 * Components in 8x8 supercel: 192 53 * Maximum SE precision per component: 164482 54 * >65025, so no truncation is needed (phew) 55 */ 56 57#include <string.h> 58 59#include "roqvideo.h" 60#include "bytestream.h" 61#include "elbg.h" 62#include "mathops.h" 63 64#define CHROMA_BIAS 1 65 66/** 67 * Maximum number of generated 4x4 codebooks. Can't be 256 to workaround a 68 * Quake 3 bug. 69 */ 70#define MAX_CBS_4x4 255 71 72#define MAX_CBS_2x2 256 ///< Maximum number of 2x2 codebooks. 73 74/* The cast is useful when multiplying it by INT_MAX */ 75#define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE) 76 77/* Macroblock support functions */ 78static void unpack_roq_cell(roq_cell *cell, uint8_t u[4*3]) 79{ 80 memcpy(u , cell->y, 4); 81 memset(u+4, cell->u, 4); 82 memset(u+8, cell->v, 4); 83} 84 85static void unpack_roq_qcell(uint8_t cb2[], roq_qcell *qcell, uint8_t u[4*4*3]) 86{ 87 int i,cp; 88 static const int offsets[4] = {0, 2, 8, 10}; 89 90 for (cp=0; cp<3; cp++) 91 for (i=0; i<4; i++) { 92 u[4*4*cp + offsets[i] ] = cb2[qcell->idx[i]*2*2*3 + 4*cp ]; 93 u[4*4*cp + offsets[i]+1] = cb2[qcell->idx[i]*2*2*3 + 4*cp+1]; 94 u[4*4*cp + offsets[i]+4] = cb2[qcell->idx[i]*2*2*3 + 4*cp+2]; 95 u[4*4*cp + offsets[i]+5] = cb2[qcell->idx[i]*2*2*3 + 4*cp+3]; 96 } 97} 98 99 100static void enlarge_roq_mb4(uint8_t base[3*16], uint8_t u[3*64]) 101{ 102 int x,y,cp; 103 104 for(cp=0; cp<3; cp++) 105 for(y=0; y<8; y++) 106 for(x=0; x<8; x++) 107 *u++ = base[(y/2)*4 + (x/2) + 16*cp]; 108} 109 110static inline int square(int x) 111{ 112 return x*x; 113} 114 115static inline int eval_sse(uint8_t *a, uint8_t *b, int count) 116{ 117 int diff=0; 118 119 while(count--) 120 diff += square(*b++ - *a++); 121 122 return diff; 123} 124 125// FIXME Could use DSPContext.sse, but it is not so speed critical (used 126// just for motion estimation). 127static int block_sse(uint8_t **buf1, uint8_t **buf2, int x1, int y1, int x2, 128 int y2, int *stride1, int *stride2, int size) 129{ 130 int i, k; 131 int sse=0; 132 133 for (k=0; k<3; k++) { 134 int bias = (k ? CHROMA_BIAS : 4); 135 for (i=0; i<size; i++) 136 sse += bias*eval_sse(buf1[k] + (y1+i)*stride1[k] + x1, 137 buf2[k] + (y2+i)*stride2[k] + x2, size); 138 } 139 140 return sse; 141} 142 143static int eval_motion_dist(RoqContext *enc, int x, int y, motion_vect vect, 144 int size) 145{ 146 int mx=vect.d[0]; 147 int my=vect.d[1]; 148 149 if (mx < -7 || mx > 7) 150 return INT_MAX; 151 152 if (my < -7 || my > 7) 153 return INT_MAX; 154 155 mx += x; 156 my += y; 157 158 if ((unsigned) mx > enc->width-size || (unsigned) my > enc->height-size) 159 return INT_MAX; 160 161 return block_sse(enc->frame_to_enc->data, enc->last_frame->data, x, y, 162 mx, my, 163 enc->frame_to_enc->linesize, enc->last_frame->linesize, 164 size); 165} 166 167/** 168 * Returns distortion between two macroblocks 169 */ 170static inline int squared_diff_macroblock(uint8_t a[], uint8_t b[], int size) 171{ 172 int cp, sdiff=0; 173 174 for(cp=0;cp<3;cp++) { 175 int bias = (cp ? CHROMA_BIAS : 4); 176 sdiff += bias*eval_sse(a, b, size*size); 177 a += size*size; 178 b += size*size; 179 } 180 181 return sdiff; 182} 183 184typedef struct 185{ 186 int eval_dist[4]; 187 int best_bit_use; 188 int best_coding; 189 190 int subCels[4]; 191 motion_vect motion; 192 int cbEntry; 193} SubcelEvaluation; 194 195typedef struct 196{ 197 int eval_dist[4]; 198 int best_coding; 199 200 SubcelEvaluation subCels[4]; 201 202 motion_vect motion; 203 int cbEntry; 204 205 int sourceX, sourceY; 206} CelEvaluation; 207 208typedef struct 209{ 210 int numCB4; 211 int numCB2; 212 int usedCB2[MAX_CBS_2x2]; 213 int usedCB4[MAX_CBS_4x4]; 214 uint8_t unpacked_cb2[MAX_CBS_2x2*2*2*3]; 215 uint8_t unpacked_cb4[MAX_CBS_4x4*4*4*3]; 216 uint8_t unpacked_cb4_enlarged[MAX_CBS_4x4*8*8*3]; 217} RoqCodebooks; 218 219/** 220 * Temporary vars 221 */ 222typedef struct RoqTempData 223{ 224 CelEvaluation *cel_evals; 225 226 int f2i4[MAX_CBS_4x4]; 227 int i2f4[MAX_CBS_4x4]; 228 int f2i2[MAX_CBS_2x2]; 229 int i2f2[MAX_CBS_2x2]; 230 231 int mainChunkSize; 232 233 int numCB4; 234 int numCB2; 235 236 RoqCodebooks codebooks; 237 238 int *closest_cb2; 239 int used_option[4]; 240} RoqTempdata; 241 242/** 243 * Initializes cel evaluators and sets their source coordinates 244 */ 245static void create_cel_evals(RoqContext *enc, RoqTempdata *tempData) 246{ 247 int n=0, x, y, i; 248 249 tempData->cel_evals = av_malloc(enc->width*enc->height/64 * sizeof(CelEvaluation)); 250 251 /* Map to the ROQ quadtree order */ 252 for (y=0; y<enc->height; y+=16) 253 for (x=0; x<enc->width; x+=16) 254 for(i=0; i<4; i++) { 255 tempData->cel_evals[n ].sourceX = x + (i&1)*8; 256 tempData->cel_evals[n++].sourceY = y + (i&2)*4; 257 } 258} 259 260/** 261 * Get macroblocks from parts of the image 262 */ 263static void get_frame_mb(AVFrame *frame, int x, int y, uint8_t mb[], int dim) 264{ 265 int i, j, cp; 266 267 for (cp=0; cp<3; cp++) { 268 int stride = frame->linesize[cp]; 269 for (i=0; i<dim; i++) 270 for (j=0; j<dim; j++) 271 *mb++ = frame->data[cp][(y+i)*stride + x + j]; 272 } 273} 274 275/** 276 * Find the codebook with the lowest distortion from an image 277 */ 278static int index_mb(uint8_t cluster[], uint8_t cb[], int numCB, 279 int *outIndex, int dim) 280{ 281 int i, lDiff = INT_MAX, pick=0; 282 283 /* Diff against the others */ 284 for (i=0; i<numCB; i++) { 285 int diff = squared_diff_macroblock(cluster, cb + i*dim*dim*3, dim); 286 if (diff < lDiff) { 287 lDiff = diff; 288 pick = i; 289 } 290 } 291 292 *outIndex = pick; 293 return lDiff; 294} 295 296#define EVAL_MOTION(MOTION) \ 297 do { \ 298 diff = eval_motion_dist(enc, j, i, MOTION, blocksize); \ 299 \ 300 if (diff < lowestdiff) { \ 301 lowestdiff = diff; \ 302 bestpick = MOTION; \ 303 } \ 304 } while(0) 305 306static void motion_search(RoqContext *enc, int blocksize) 307{ 308 static const motion_vect offsets[8] = { 309 {{ 0,-1}}, 310 {{ 0, 1}}, 311 {{-1, 0}}, 312 {{ 1, 0}}, 313 {{-1, 1}}, 314 {{ 1,-1}}, 315 {{-1,-1}}, 316 {{ 1, 1}}, 317 }; 318 319 int diff, lowestdiff, oldbest; 320 int off[3]; 321 motion_vect bestpick = {{0,0}}; 322 int i, j, k, offset; 323 324 motion_vect *last_motion; 325 motion_vect *this_motion; 326 motion_vect vect, vect2; 327 328 int max=(enc->width/blocksize)*enc->height/blocksize; 329 330 if (blocksize == 4) { 331 last_motion = enc->last_motion4; 332 this_motion = enc->this_motion4; 333 } else { 334 last_motion = enc->last_motion8; 335 this_motion = enc->this_motion8; 336 } 337 338 for (i=0; i<enc->height; i+=blocksize) 339 for (j=0; j<enc->width; j+=blocksize) { 340 lowestdiff = eval_motion_dist(enc, j, i, (motion_vect) {{0,0}}, 341 blocksize); 342 bestpick.d[0] = 0; 343 bestpick.d[1] = 0; 344 345 if (blocksize == 4) 346 EVAL_MOTION(enc->this_motion8[(i/8)*(enc->width/8) + j/8]); 347 348 offset = (i/blocksize)*enc->width/blocksize + j/blocksize; 349 if (offset < max && offset >= 0) 350 EVAL_MOTION(last_motion[offset]); 351 352 offset++; 353 if (offset < max && offset >= 0) 354 EVAL_MOTION(last_motion[offset]); 355 356 offset = (i/blocksize + 1)*enc->width/blocksize + j/blocksize; 357 if (offset < max && offset >= 0) 358 EVAL_MOTION(last_motion[offset]); 359 360 off[0]= (i/blocksize)*enc->width/blocksize + j/blocksize - 1; 361 off[1]= off[0] - enc->width/blocksize + 1; 362 off[2]= off[1] + 1; 363 364 if (i) { 365 366 for(k=0; k<2; k++) 367 vect.d[k]= mid_pred(this_motion[off[0]].d[k], 368 this_motion[off[1]].d[k], 369 this_motion[off[2]].d[k]); 370 371 EVAL_MOTION(vect); 372 for(k=0; k<3; k++) 373 EVAL_MOTION(this_motion[off[k]]); 374 } else if(j) 375 EVAL_MOTION(this_motion[off[0]]); 376 377 vect = bestpick; 378 379 oldbest = -1; 380 while (oldbest != lowestdiff) { 381 oldbest = lowestdiff; 382 for (k=0; k<8; k++) { 383 vect2 = vect; 384 vect2.d[0] += offsets[k].d[0]; 385 vect2.d[1] += offsets[k].d[1]; 386 EVAL_MOTION(vect2); 387 } 388 vect = bestpick; 389 } 390 offset = (i/blocksize)*enc->width/blocksize + j/blocksize; 391 this_motion[offset] = bestpick; 392 } 393} 394 395/** 396 * Gets distortion for all options available to a subcel 397 */ 398static void gather_data_for_subcel(SubcelEvaluation *subcel, int x, 399 int y, RoqContext *enc, RoqTempdata *tempData) 400{ 401 uint8_t mb4[4*4*3]; 402 uint8_t mb2[2*2*3]; 403 int cluster_index; 404 int i, best_dist; 405 406 static const int bitsUsed[4] = {2, 10, 10, 34}; 407 408 if (enc->framesSinceKeyframe >= 1) { 409 subcel->motion = enc->this_motion4[y*enc->width/16 + x/4]; 410 411 subcel->eval_dist[RoQ_ID_FCC] = 412 eval_motion_dist(enc, x, y, 413 enc->this_motion4[y*enc->width/16 + x/4], 4); 414 } else 415 subcel->eval_dist[RoQ_ID_FCC] = INT_MAX; 416 417 if (enc->framesSinceKeyframe >= 2) 418 subcel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data, 419 enc->current_frame->data, x, 420 y, x, y, 421 enc->frame_to_enc->linesize, 422 enc->current_frame->linesize, 423 4); 424 else 425 subcel->eval_dist[RoQ_ID_MOT] = INT_MAX; 426 427 cluster_index = y*enc->width/16 + x/4; 428 429 get_frame_mb(enc->frame_to_enc, x, y, mb4, 4); 430 431 subcel->eval_dist[RoQ_ID_SLD] = index_mb(mb4, 432 tempData->codebooks.unpacked_cb4, 433 tempData->codebooks.numCB4, 434 &subcel->cbEntry, 4); 435 436 subcel->eval_dist[RoQ_ID_CCC] = 0; 437 438 for(i=0;i<4;i++) { 439 subcel->subCels[i] = tempData->closest_cb2[cluster_index*4+i]; 440 441 get_frame_mb(enc->frame_to_enc, x+2*(i&1), 442 y+(i&2), mb2, 2); 443 444 subcel->eval_dist[RoQ_ID_CCC] += 445 squared_diff_macroblock(tempData->codebooks.unpacked_cb2 + subcel->subCels[i]*2*2*3, mb2, 2); 446 } 447 448 best_dist = INT_MAX; 449 for (i=0; i<4; i++) 450 if (ROQ_LAMBDA_SCALE*subcel->eval_dist[i] + enc->lambda*bitsUsed[i] < 451 best_dist) { 452 subcel->best_coding = i; 453 subcel->best_bit_use = bitsUsed[i]; 454 best_dist = ROQ_LAMBDA_SCALE*subcel->eval_dist[i] + 455 enc->lambda*bitsUsed[i]; 456 } 457} 458 459/** 460 * Gets distortion for all options available to a cel 461 */ 462static void gather_data_for_cel(CelEvaluation *cel, RoqContext *enc, 463 RoqTempdata *tempData) 464{ 465 uint8_t mb8[8*8*3]; 466 int index = cel->sourceY*enc->width/64 + cel->sourceX/8; 467 int i, j, best_dist, divide_bit_use; 468 469 int bitsUsed[4] = {2, 10, 10, 0}; 470 471 if (enc->framesSinceKeyframe >= 1) { 472 cel->motion = enc->this_motion8[index]; 473 474 cel->eval_dist[RoQ_ID_FCC] = 475 eval_motion_dist(enc, cel->sourceX, cel->sourceY, 476 enc->this_motion8[index], 8); 477 } else 478 cel->eval_dist[RoQ_ID_FCC] = INT_MAX; 479 480 if (enc->framesSinceKeyframe >= 2) 481 cel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data, 482 enc->current_frame->data, 483 cel->sourceX, cel->sourceY, 484 cel->sourceX, cel->sourceY, 485 enc->frame_to_enc->linesize, 486 enc->current_frame->linesize,8); 487 else 488 cel->eval_dist[RoQ_ID_MOT] = INT_MAX; 489 490 get_frame_mb(enc->frame_to_enc, cel->sourceX, cel->sourceY, mb8, 8); 491 492 cel->eval_dist[RoQ_ID_SLD] = 493 index_mb(mb8, tempData->codebooks.unpacked_cb4_enlarged, 494 tempData->codebooks.numCB4, &cel->cbEntry, 8); 495 496 gather_data_for_subcel(cel->subCels + 0, cel->sourceX+0, cel->sourceY+0, enc, tempData); 497 gather_data_for_subcel(cel->subCels + 1, cel->sourceX+4, cel->sourceY+0, enc, tempData); 498 gather_data_for_subcel(cel->subCels + 2, cel->sourceX+0, cel->sourceY+4, enc, tempData); 499 gather_data_for_subcel(cel->subCels + 3, cel->sourceX+4, cel->sourceY+4, enc, tempData); 500 501 cel->eval_dist[RoQ_ID_CCC] = 0; 502 divide_bit_use = 0; 503 for (i=0; i<4; i++) { 504 cel->eval_dist[RoQ_ID_CCC] += 505 cel->subCels[i].eval_dist[cel->subCels[i].best_coding]; 506 divide_bit_use += cel->subCels[i].best_bit_use; 507 } 508 509 best_dist = INT_MAX; 510 bitsUsed[3] = 2 + divide_bit_use; 511 512 for (i=0; i<4; i++) 513 if (ROQ_LAMBDA_SCALE*cel->eval_dist[i] + enc->lambda*bitsUsed[i] < 514 best_dist) { 515 cel->best_coding = i; 516 best_dist = ROQ_LAMBDA_SCALE*cel->eval_dist[i] + 517 enc->lambda*bitsUsed[i]; 518 } 519 520 tempData->used_option[cel->best_coding]++; 521 tempData->mainChunkSize += bitsUsed[cel->best_coding]; 522 523 if (cel->best_coding == RoQ_ID_SLD) 524 tempData->codebooks.usedCB4[cel->cbEntry]++; 525 526 if (cel->best_coding == RoQ_ID_CCC) 527 for (i=0; i<4; i++) { 528 if (cel->subCels[i].best_coding == RoQ_ID_SLD) 529 tempData->codebooks.usedCB4[cel->subCels[i].cbEntry]++; 530 else if (cel->subCels[i].best_coding == RoQ_ID_CCC) 531 for (j=0; j<4; j++) 532 tempData->codebooks.usedCB2[cel->subCels[i].subCels[j]]++; 533 } 534} 535 536static void remap_codebooks(RoqContext *enc, RoqTempdata *tempData) 537{ 538 int i, j, idx=0; 539 540 /* Make remaps for the final codebook usage */ 541 for (i=0; i<MAX_CBS_4x4; i++) { 542 if (tempData->codebooks.usedCB4[i]) { 543 tempData->i2f4[i] = idx; 544 tempData->f2i4[idx] = i; 545 for (j=0; j<4; j++) 546 tempData->codebooks.usedCB2[enc->cb4x4[i].idx[j]]++; 547 idx++; 548 } 549 } 550 551 tempData->numCB4 = idx; 552 553 idx = 0; 554 for (i=0; i<MAX_CBS_2x2; i++) { 555 if (tempData->codebooks.usedCB2[i]) { 556 tempData->i2f2[i] = idx; 557 tempData->f2i2[idx] = i; 558 idx++; 559 } 560 } 561 tempData->numCB2 = idx; 562 563} 564 565/** 566 * Write codebook chunk 567 */ 568static void write_codebooks(RoqContext *enc, RoqTempdata *tempData) 569{ 570 int i, j; 571 uint8_t **outp= &enc->out_buf; 572 573 if (tempData->numCB2) { 574 bytestream_put_le16(outp, RoQ_QUAD_CODEBOOK); 575 bytestream_put_le32(outp, tempData->numCB2*6 + tempData->numCB4*4); 576 bytestream_put_byte(outp, tempData->numCB4); 577 bytestream_put_byte(outp, tempData->numCB2); 578 579 for (i=0; i<tempData->numCB2; i++) { 580 bytestream_put_buffer(outp, enc->cb2x2[tempData->f2i2[i]].y, 4); 581 bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].u); 582 bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].v); 583 } 584 585 for (i=0; i<tempData->numCB4; i++) 586 for (j=0; j<4; j++) 587 bytestream_put_byte(outp, tempData->i2f2[enc->cb4x4[tempData->f2i4[i]].idx[j]]); 588 589 } 590} 591 592static inline uint8_t motion_arg(motion_vect mot) 593{ 594 uint8_t ax = 8 - ((uint8_t) mot.d[0]); 595 uint8_t ay = 8 - ((uint8_t) mot.d[1]); 596 return ((ax&15)<<4) | (ay&15); 597} 598 599typedef struct 600{ 601 int typeSpool; 602 int typeSpoolLength; 603 uint8_t argumentSpool[64]; 604 uint8_t *args; 605 uint8_t **pout; 606} CodingSpool; 607 608/* NOTE: Typecodes must be spooled AFTER arguments!! */ 609static void write_typecode(CodingSpool *s, uint8_t type) 610{ 611 s->typeSpool |= (type & 3) << (14 - s->typeSpoolLength); 612 s->typeSpoolLength += 2; 613 if (s->typeSpoolLength == 16) { 614 bytestream_put_le16(s->pout, s->typeSpool); 615 bytestream_put_buffer(s->pout, s->argumentSpool, 616 s->args - s->argumentSpool); 617 s->typeSpoolLength = 0; 618 s->typeSpool = 0; 619 s->args = s->argumentSpool; 620 } 621} 622 623static void reconstruct_and_encode_image(RoqContext *enc, RoqTempdata *tempData, int w, int h, int numBlocks) 624{ 625 int i, j, k; 626 int x, y; 627 int subX, subY; 628 int dist=0; 629 630 roq_qcell *qcell; 631 CelEvaluation *eval; 632 633 CodingSpool spool; 634 635 spool.typeSpool=0; 636 spool.typeSpoolLength=0; 637 spool.args = spool.argumentSpool; 638 spool.pout = &enc->out_buf; 639 640 if (tempData->used_option[RoQ_ID_CCC]%2) 641 tempData->mainChunkSize+=8; //FIXME 642 643 /* Write the video chunk header */ 644 bytestream_put_le16(&enc->out_buf, RoQ_QUAD_VQ); 645 bytestream_put_le32(&enc->out_buf, tempData->mainChunkSize/8); 646 bytestream_put_byte(&enc->out_buf, 0x0); 647 bytestream_put_byte(&enc->out_buf, 0x0); 648 649 for (i=0; i<numBlocks; i++) { 650 eval = tempData->cel_evals + i; 651 652 x = eval->sourceX; 653 y = eval->sourceY; 654 dist += eval->eval_dist[eval->best_coding]; 655 656 switch (eval->best_coding) { 657 case RoQ_ID_MOT: 658 write_typecode(&spool, RoQ_ID_MOT); 659 break; 660 661 case RoQ_ID_FCC: 662 bytestream_put_byte(&spool.args, motion_arg(eval->motion)); 663 664 write_typecode(&spool, RoQ_ID_FCC); 665 ff_apply_motion_8x8(enc, x, y, 666 eval->motion.d[0], eval->motion.d[1]); 667 break; 668 669 case RoQ_ID_SLD: 670 bytestream_put_byte(&spool.args, tempData->i2f4[eval->cbEntry]); 671 write_typecode(&spool, RoQ_ID_SLD); 672 673 qcell = enc->cb4x4 + eval->cbEntry; 674 ff_apply_vector_4x4(enc, x , y , enc->cb2x2 + qcell->idx[0]); 675 ff_apply_vector_4x4(enc, x+4, y , enc->cb2x2 + qcell->idx[1]); 676 ff_apply_vector_4x4(enc, x , y+4, enc->cb2x2 + qcell->idx[2]); 677 ff_apply_vector_4x4(enc, x+4, y+4, enc->cb2x2 + qcell->idx[3]); 678 break; 679 680 case RoQ_ID_CCC: 681 write_typecode(&spool, RoQ_ID_CCC); 682 683 for (j=0; j<4; j++) { 684 subX = x + 4*(j&1); 685 subY = y + 2*(j&2); 686 687 switch(eval->subCels[j].best_coding) { 688 case RoQ_ID_MOT: 689 break; 690 691 case RoQ_ID_FCC: 692 bytestream_put_byte(&spool.args, 693 motion_arg(eval->subCels[j].motion)); 694 695 ff_apply_motion_4x4(enc, subX, subY, 696 eval->subCels[j].motion.d[0], 697 eval->subCels[j].motion.d[1]); 698 break; 699 700 case RoQ_ID_SLD: 701 bytestream_put_byte(&spool.args, 702 tempData->i2f4[eval->subCels[j].cbEntry]); 703 704 qcell = enc->cb4x4 + eval->subCels[j].cbEntry; 705 706 ff_apply_vector_2x2(enc, subX , subY , 707 enc->cb2x2 + qcell->idx[0]); 708 ff_apply_vector_2x2(enc, subX+2, subY , 709 enc->cb2x2 + qcell->idx[1]); 710 ff_apply_vector_2x2(enc, subX , subY+2, 711 enc->cb2x2 + qcell->idx[2]); 712 ff_apply_vector_2x2(enc, subX+2, subY+2, 713 enc->cb2x2 + qcell->idx[3]); 714 break; 715 716 case RoQ_ID_CCC: 717 for (k=0; k<4; k++) { 718 int cb_idx = eval->subCels[j].subCels[k]; 719 bytestream_put_byte(&spool.args, 720 tempData->i2f2[cb_idx]); 721 722 ff_apply_vector_2x2(enc, subX + 2*(k&1), subY + (k&2), 723 enc->cb2x2 + cb_idx); 724 } 725 break; 726 } 727 write_typecode(&spool, eval->subCels[j].best_coding); 728 } 729 break; 730 } 731 } 732 733 /* Flush the remainder of the argument/type spool */ 734 while (spool.typeSpoolLength) 735 write_typecode(&spool, 0x0); 736 737#if 0 738 uint8_t *fdata[3] = {enc->frame_to_enc->data[0], 739 enc->frame_to_enc->data[1], 740 enc->frame_to_enc->data[2]}; 741 uint8_t *cdata[3] = {enc->current_frame->data[0], 742 enc->current_frame->data[1], 743 enc->current_frame->data[2]}; 744 av_log(enc->avctx, AV_LOG_ERROR, "Expected distortion: %i Actual: %i\n", 745 dist, 746 block_sse(fdata, cdata, 0, 0, 0, 0, 747 enc->frame_to_enc->linesize, 748 enc->current_frame->linesize, 749 enc->width)); //WARNING: Square dimensions implied... 750#endif 751} 752 753 754/** 755 * Create a single YUV cell from a 2x2 section of the image 756 */ 757static inline void frame_block_to_cell(uint8_t *block, uint8_t **data, 758 int top, int left, int *stride) 759{ 760 int i, j, u=0, v=0; 761 762 for (i=0; i<2; i++) 763 for (j=0; j<2; j++) { 764 int x = (top+i)*stride[0] + left + j; 765 *block++ = data[0][x]; 766 x = (top+i)*stride[1] + left + j; 767 u += data[1][x]; 768 v += data[2][x]; 769 } 770 771 *block++ = (u+2)/4; 772 *block++ = (v+2)/4; 773} 774 775/** 776 * Creates YUV clusters for the entire image 777 */ 778static void create_clusters(AVFrame *frame, int w, int h, uint8_t *yuvClusters) 779{ 780 int i, j, k, l; 781 782 for (i=0; i<h; i+=4) 783 for (j=0; j<w; j+=4) { 784 for (k=0; k < 2; k++) 785 for (l=0; l < 2; l++) 786 frame_block_to_cell(yuvClusters + (l + 2*k)*6, frame->data, 787 i+2*k, j+2*l, frame->linesize); 788 yuvClusters += 24; 789 } 790} 791 792static void generate_codebook(RoqContext *enc, RoqTempdata *tempdata, 793 int *points, int inputCount, roq_cell *results, 794 int size, int cbsize) 795{ 796 int i, j, k; 797 int c_size = size*size/4; 798 int *buf; 799 int *codebook = av_malloc(6*c_size*cbsize*sizeof(int)); 800 int *closest_cb; 801 802 if (size == 4) 803 closest_cb = av_malloc(6*c_size*inputCount*sizeof(int)); 804 else 805 closest_cb = tempdata->closest_cb2; 806 807 ff_init_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx); 808 ff_do_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx); 809 810 if (size == 4) 811 av_free(closest_cb); 812 813 buf = codebook; 814 for (i=0; i<cbsize; i++) 815 for (k=0; k<c_size; k++) { 816 for(j=0; j<4; j++) 817 results->y[j] = *buf++; 818 819 results->u = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS; 820 results->v = (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS; 821 results++; 822 } 823 824 av_free(codebook); 825} 826 827static void generate_new_codebooks(RoqContext *enc, RoqTempdata *tempData) 828{ 829 int i,j; 830 RoqCodebooks *codebooks = &tempData->codebooks; 831 int max = enc->width*enc->height/16; 832 uint8_t mb2[3*4]; 833 roq_cell *results4 = av_malloc(sizeof(roq_cell)*MAX_CBS_4x4*4); 834 uint8_t *yuvClusters=av_malloc(sizeof(int)*max*6*4); 835 int *points = av_malloc(max*6*4*sizeof(int)); 836 int bias; 837 838 /* Subsample YUV data */ 839 create_clusters(enc->frame_to_enc, enc->width, enc->height, yuvClusters); 840 841 /* Cast to integer and apply chroma bias */ 842 for (i=0; i<max*24; i++) { 843 bias = ((i%6)<4) ? 1 : CHROMA_BIAS; 844 points[i] = bias*yuvClusters[i]; 845 } 846 847 /* Create 4x4 codebooks */ 848 generate_codebook(enc, tempData, points, max, results4, 4, MAX_CBS_4x4); 849 850 codebooks->numCB4 = MAX_CBS_4x4; 851 852 tempData->closest_cb2 = av_malloc(max*4*sizeof(int)); 853 854 /* Create 2x2 codebooks */ 855 generate_codebook(enc, tempData, points, max*4, enc->cb2x2, 2, MAX_CBS_2x2); 856 857 codebooks->numCB2 = MAX_CBS_2x2; 858 859 /* Unpack 2x2 codebook clusters */ 860 for (i=0; i<codebooks->numCB2; i++) 861 unpack_roq_cell(enc->cb2x2 + i, codebooks->unpacked_cb2 + i*2*2*3); 862 863 /* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */ 864 for (i=0; i<codebooks->numCB4; i++) { 865 for (j=0; j<4; j++) { 866 unpack_roq_cell(&results4[4*i + j], mb2); 867 index_mb(mb2, codebooks->unpacked_cb2, codebooks->numCB2, 868 &enc->cb4x4[i].idx[j], 2); 869 } 870 unpack_roq_qcell(codebooks->unpacked_cb2, enc->cb4x4 + i, 871 codebooks->unpacked_cb4 + i*4*4*3); 872 enlarge_roq_mb4(codebooks->unpacked_cb4 + i*4*4*3, 873 codebooks->unpacked_cb4_enlarged + i*8*8*3); 874 } 875 876 av_free(yuvClusters); 877 av_free(points); 878 av_free(results4); 879} 880 881static void roq_encode_video(RoqContext *enc) 882{ 883 RoqTempdata *tempData = enc->tmpData; 884 int i; 885 886 memset(tempData, 0, sizeof(*tempData)); 887 888 create_cel_evals(enc, tempData); 889 890 generate_new_codebooks(enc, tempData); 891 892 if (enc->framesSinceKeyframe >= 1) { 893 motion_search(enc, 8); 894 motion_search(enc, 4); 895 } 896 897 retry_encode: 898 for (i=0; i<enc->width*enc->height/64; i++) 899 gather_data_for_cel(tempData->cel_evals + i, enc, tempData); 900 901 /* Quake 3 can't handle chunks bigger than 65536 bytes */ 902 if (tempData->mainChunkSize/8 > 65536) { 903 enc->lambda *= .8; 904 goto retry_encode; 905 } 906 907 remap_codebooks(enc, tempData); 908 909 write_codebooks(enc, tempData); 910 911 reconstruct_and_encode_image(enc, tempData, enc->width, enc->height, 912 enc->width*enc->height/64); 913 914 enc->avctx->coded_frame = enc->current_frame; 915 916 /* Rotate frame history */ 917 FFSWAP(AVFrame *, enc->current_frame, enc->last_frame); 918 FFSWAP(motion_vect *, enc->last_motion4, enc->this_motion4); 919 FFSWAP(motion_vect *, enc->last_motion8, enc->this_motion8); 920 921 av_free(tempData->cel_evals); 922 av_free(tempData->closest_cb2); 923 924 enc->framesSinceKeyframe++; 925} 926 927static int roq_encode_init(AVCodecContext *avctx) 928{ 929 RoqContext *enc = avctx->priv_data; 930 931 av_lfg_init(&enc->randctx, 1); 932 933 enc->framesSinceKeyframe = 0; 934 if ((avctx->width & 0xf) || (avctx->height & 0xf)) { 935 av_log(avctx, AV_LOG_ERROR, "Dimensions must be divisible by 16\n"); 936 return -1; 937 } 938 939 if (((avctx->width)&(avctx->width-1))||((avctx->height)&(avctx->height-1))) 940 av_log(avctx, AV_LOG_ERROR, "Warning: dimensions not power of two\n"); 941 942 enc->width = avctx->width; 943 enc->height = avctx->height; 944 945 enc->framesSinceKeyframe = 0; 946 enc->first_frame = 1; 947 948 enc->last_frame = &enc->frames[0]; 949 enc->current_frame = &enc->frames[1]; 950 951 enc->tmpData = av_malloc(sizeof(RoqTempdata)); 952 953 enc->this_motion4 = 954 av_mallocz((enc->width*enc->height/16)*sizeof(motion_vect)); 955 956 enc->last_motion4 = 957 av_malloc ((enc->width*enc->height/16)*sizeof(motion_vect)); 958 959 enc->this_motion8 = 960 av_mallocz((enc->width*enc->height/64)*sizeof(motion_vect)); 961 962 enc->last_motion8 = 963 av_malloc ((enc->width*enc->height/64)*sizeof(motion_vect)); 964 965 return 0; 966} 967 968static void roq_write_video_info_chunk(RoqContext *enc) 969{ 970 /* ROQ info chunk */ 971 bytestream_put_le16(&enc->out_buf, RoQ_INFO); 972 973 /* Size: 8 bytes */ 974 bytestream_put_le32(&enc->out_buf, 8); 975 976 /* Unused argument */ 977 bytestream_put_byte(&enc->out_buf, 0x00); 978 bytestream_put_byte(&enc->out_buf, 0x00); 979 980 /* Width */ 981 bytestream_put_le16(&enc->out_buf, enc->width); 982 983 /* Height */ 984 bytestream_put_le16(&enc->out_buf, enc->height); 985 986 /* Unused in Quake 3, mimics the output of the real encoder */ 987 bytestream_put_byte(&enc->out_buf, 0x08); 988 bytestream_put_byte(&enc->out_buf, 0x00); 989 bytestream_put_byte(&enc->out_buf, 0x04); 990 bytestream_put_byte(&enc->out_buf, 0x00); 991} 992 993static int roq_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data) 994{ 995 RoqContext *enc = avctx->priv_data; 996 AVFrame *frame= data; 997 uint8_t *buf_start = buf; 998 999 enc->out_buf = buf; 1000 enc->avctx = avctx; 1001 1002 enc->frame_to_enc = frame; 1003 1004 if (frame->quality) 1005 enc->lambda = frame->quality - 1; 1006 else 1007 enc->lambda = 2*ROQ_LAMBDA_SCALE; 1008 1009 /* 138 bits max per 8x8 block + 1010 * 256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */ 1011 if (((enc->width*enc->height/64)*138+7)/8 + 256*(6+4) + 8 > buf_size) { 1012 av_log(avctx, AV_LOG_ERROR, " RoQ: Output buffer too small!\n"); 1013 return -1; 1014 } 1015 1016 /* Check for I frame */ 1017 if (enc->framesSinceKeyframe == avctx->gop_size) 1018 enc->framesSinceKeyframe = 0; 1019 1020 if (enc->first_frame) { 1021 /* Alloc memory for the reconstruction data (we must know the stride 1022 for that) */ 1023 if (avctx->get_buffer(avctx, enc->current_frame) || 1024 avctx->get_buffer(avctx, enc->last_frame)) { 1025 av_log(avctx, AV_LOG_ERROR, " RoQ: get_buffer() failed\n"); 1026 return -1; 1027 } 1028 1029 /* Before the first video frame, write a "video info" chunk */ 1030 roq_write_video_info_chunk(enc); 1031 1032 enc->first_frame = 0; 1033 } 1034 1035 /* Encode the actual frame */ 1036 roq_encode_video(enc); 1037 1038 return enc->out_buf - buf_start; 1039} 1040 1041static int roq_encode_end(AVCodecContext *avctx) 1042{ 1043 RoqContext *enc = avctx->priv_data; 1044 1045 avctx->release_buffer(avctx, enc->last_frame); 1046 avctx->release_buffer(avctx, enc->current_frame); 1047 1048 av_free(enc->tmpData); 1049 av_free(enc->this_motion4); 1050 av_free(enc->last_motion4); 1051 av_free(enc->this_motion8); 1052 av_free(enc->last_motion8); 1053 1054 return 0; 1055} 1056 1057AVCodec roq_encoder = 1058{ 1059 "roqvideo", 1060 AVMEDIA_TYPE_VIDEO, 1061 CODEC_ID_ROQ, 1062 sizeof(RoqContext), 1063 roq_encode_init, 1064 roq_encode_frame, 1065 roq_encode_end, 1066 .supported_framerates = (const AVRational[]){{30,1}, {0,0}}, 1067 .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV444P, PIX_FMT_NONE}, 1068 .long_name = NULL_IF_CONFIG_SMALL("id RoQ video"), 1069}; 1070