1/* 2 * Copyright (c) 2005 Robert Edele <yartrebo@earthlink.net> 3 * Copyright (c) 2012 Stefano Sabatini 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * Advanced blur-based logo removing filter 25 * 26 * This filter loads an image mask file showing where a logo is and 27 * uses a blur transform to remove the logo. 28 * 29 * Based on the libmpcodecs remove-logo filter by Robert Edele. 30 */ 31 32/** 33 * This code implements a filter to remove annoying TV logos and other annoying 34 * images placed onto a video stream. It works by filling in the pixels that 35 * comprise the logo with neighboring pixels. The transform is very loosely 36 * based on a gaussian blur, but it is different enough to merit its own 37 * paragraph later on. It is a major improvement on the old delogo filter as it 38 * both uses a better blurring algorithm and uses a bitmap to use an arbitrary 39 * and generally much tighter fitting shape than a rectangle. 40 * 41 * The logo removal algorithm has two key points. The first is that it 42 * distinguishes between pixels in the logo and those not in the logo by using 43 * the passed-in bitmap. Pixels not in the logo are copied over directly without 44 * being modified and they also serve as source pixels for the logo 45 * fill-in. Pixels inside the logo have the mask applied. 46 * 47 * At init-time the bitmap is reprocessed internally, and the distance to the 48 * nearest edge of the logo (Manhattan distance), along with a little extra to 49 * remove rough edges, is stored in each pixel. This is done using an in-place 50 * erosion algorithm, and incrementing each pixel that survives any given 51 * erosion. Once every pixel is eroded, the maximum value is recorded, and a 52 * set of masks from size 0 to this size are generaged. The masks are circular 53 * binary masks, where each pixel within a radius N (where N is the size of the 54 * mask) is a 1, and all other pixels are a 0. Although a gaussian mask would be 55 * more mathematically accurate, a binary mask works better in practice because 56 * we generally do not use the central pixels in the mask (because they are in 57 * the logo region), and thus a gaussian mask will cause too little blur and 58 * thus a very unstable image. 59 * 60 * The mask is applied in a special way. Namely, only pixels in the mask that 61 * line up to pixels outside the logo are used. The dynamic mask size means that 62 * the mask is just big enough so that the edges touch pixels outside the logo, 63 * so the blurring is kept to a minimum and at least the first boundary 64 * condition is met (that the image function itself is continuous), even if the 65 * second boundary condition (that the derivative of the image function is 66 * continuous) is not met. A masking algorithm that does preserve the second 67 * boundary coundition (perhaps something based on a highly-modified bi-cubic 68 * algorithm) should offer even better results on paper, but the noise in a 69 * typical TV signal should make anything based on derivatives hopelessly noisy. 70 */ 71 72#include "libavutil/imgutils.h" 73#include "libavutil/opt.h" 74#include "avfilter.h" 75#include "formats.h" 76#include "internal.h" 77#include "video.h" 78#include "bbox.h" 79#include "lavfutils.h" 80#include "lswsutils.h" 81 82typedef struct { 83 const AVClass *class; 84 char *filename; 85 /* Stores our collection of masks. The first is for an array of 86 the second for the y axis, and the third for the x axis. */ 87 int ***mask; 88 int max_mask_size; 89 int mask_w, mask_h; 90 91 uint8_t *full_mask_data; 92 FFBoundingBox full_mask_bbox; 93 uint8_t *half_mask_data; 94 FFBoundingBox half_mask_bbox; 95} RemovelogoContext; 96 97#define OFFSET(x) offsetof(RemovelogoContext, x) 98#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM 99static const AVOption removelogo_options[] = { 100 { "filename", "set bitmap filename", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS }, 101 { "f", "set bitmap filename", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS }, 102 { NULL } 103}; 104 105AVFILTER_DEFINE_CLASS(removelogo); 106 107/** 108 * Choose a slightly larger mask size to improve performance. 109 * 110 * This function maps the absolute minimum mask size needed to the 111 * mask size we'll actually use. f(x) = x (the smallest that will 112 * work) will produce the sharpest results, but will be quite 113 * jittery. f(x) = 1.25x (what I'm using) is a good tradeoff in my 114 * opinion. This will calculate only at init-time, so you can put a 115 * long expression here without effecting performance. 116 */ 117#define apply_mask_fudge_factor(x) (((x) >> 2) + x) 118 119/** 120 * Pre-process an image to give distance information. 121 * 122 * This function takes a bitmap image and converts it in place into a 123 * distance image. A distance image is zero for pixels outside of the 124 * logo and is the Manhattan distance (|dx| + |dy|) from the logo edge 125 * for pixels inside of the logo. This will overestimate the distance, 126 * but that is safe, and is far easier to implement than a proper 127 * pythagorean distance since I'm using a modified erosion algorithm 128 * to compute the distances. 129 * 130 * @param mask image which will be converted from a greyscale image 131 * into a distance image. 132 */ 133static void convert_mask_to_strength_mask(uint8_t *data, int linesize, 134 int w, int h, int min_val, 135 int *max_mask_size) 136{ 137 int x, y; 138 139 /* How many times we've gone through the loop. Used in the 140 in-place erosion algorithm and to get us max_mask_size later on. */ 141 int current_pass = 0; 142 143 /* set all non-zero values to 1 */ 144 for (y = 0; y < h; y++) 145 for (x = 0; x < w; x++) 146 data[y*linesize + x] = data[y*linesize + x] > min_val; 147 148 /* For each pass, if a pixel is itself the same value as the 149 current pass, and its four neighbors are too, then it is 150 incremented. If no pixels are incremented by the end of the 151 pass, then we go again. Edge pixels are counted as always 152 excluded (this should be true anyway for any sane mask, but if 153 it isn't this will ensure that we eventually exit). */ 154 while (1) { 155 /* If this doesn't get set by the end of this pass, then we're done. */ 156 int has_anything_changed = 0; 157 uint8_t *current_pixel0 = data + 1 + linesize, *current_pixel; 158 current_pass++; 159 160 for (y = 1; y < h-1; y++) { 161 current_pixel = current_pixel0; 162 for (x = 1; x < w-1; x++) { 163 /* Apply the in-place erosion transform. It is based 164 on the following two premises: 165 1 - Any pixel that fails 1 erosion will fail all 166 future erosions. 167 168 2 - Only pixels having survived all erosions up to 169 the present will be >= to current_pass. 170 It doesn't matter if it survived the current pass, 171 failed it, or hasn't been tested yet. By using >= 172 instead of ==, we allow the algorithm to work in 173 place. */ 174 if ( *current_pixel >= current_pass && 175 *(current_pixel + 1) >= current_pass && 176 *(current_pixel - 1) >= current_pass && 177 *(current_pixel + linesize) >= current_pass && 178 *(current_pixel - linesize) >= current_pass) { 179 /* Increment the value since it still has not been 180 * eroded, as evidenced by the if statement that 181 * just evaluated to true. */ 182 (*current_pixel)++; 183 has_anything_changed = 1; 184 } 185 current_pixel++; 186 } 187 current_pixel0 += linesize; 188 } 189 if (!has_anything_changed) 190 break; 191 } 192 193 /* Apply the fudge factor, which will increase the size of the 194 * mask a little to reduce jitter at the cost of more blur. */ 195 for (y = 1; y < h - 1; y++) 196 for (x = 1; x < w - 1; x++) 197 data[(y * linesize) + x] = apply_mask_fudge_factor(data[(y * linesize) + x]); 198 199 /* As a side-effect, we now know the maximum mask size, which 200 * we'll use to generate our masks. */ 201 /* Apply the fudge factor to this number too, since we must ensure 202 * that enough masks are generated. */ 203 *max_mask_size = apply_mask_fudge_factor(current_pass + 1); 204} 205 206static int query_formats(AVFilterContext *ctx) 207{ 208 static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE }; 209 ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); 210 return 0; 211} 212 213static int load_mask(uint8_t **mask, int *w, int *h, 214 const char *filename, void *log_ctx) 215{ 216 int ret; 217 enum AVPixelFormat pix_fmt; 218 uint8_t *src_data[4], *gray_data[4]; 219 int src_linesize[4], gray_linesize[4]; 220 221 /* load image from file */ 222 if ((ret = ff_load_image(src_data, src_linesize, w, h, &pix_fmt, filename, log_ctx)) < 0) 223 return ret; 224 225 /* convert the image to GRAY8 */ 226 if ((ret = ff_scale_image(gray_data, gray_linesize, *w, *h, AV_PIX_FMT_GRAY8, 227 src_data, src_linesize, *w, *h, pix_fmt, 228 log_ctx)) < 0) 229 goto end; 230 231 /* copy mask to a newly allocated array */ 232 *mask = av_malloc(*w * *h); 233 if (!*mask) 234 ret = AVERROR(ENOMEM); 235 av_image_copy_plane(*mask, *w, gray_data[0], gray_linesize[0], *w, *h); 236 237end: 238 av_freep(&src_data[0]); 239 av_freep(&gray_data[0]); 240 return ret; 241} 242 243/** 244 * Generate a scaled down image with half width, height, and intensity. 245 * 246 * This function not only scales down an image, but halves the value 247 * in each pixel too. The purpose of this is to produce a chroma 248 * filter image out of a luma filter image. The pixel values store the 249 * distance to the edge of the logo and halving the dimensions halves 250 * the distance. This function rounds up, because a downwards rounding 251 * error could cause the filter to fail, but an upwards rounding error 252 * will only cause a minor amount of excess blur in the chroma planes. 253 */ 254static void generate_half_size_image(const uint8_t *src_data, int src_linesize, 255 uint8_t *dst_data, int dst_linesize, 256 int src_w, int src_h, 257 int *max_mask_size) 258{ 259 int x, y; 260 261 /* Copy over the image data, using the average of 4 pixels for to 262 * calculate each downsampled pixel. */ 263 for (y = 0; y < src_h/2; y++) { 264 for (x = 0; x < src_w/2; x++) { 265 /* Set the pixel if there exists a non-zero value in the 266 * source pixels, else clear it. */ 267 dst_data[(y * dst_linesize) + x] = 268 src_data[((y << 1) * src_linesize) + (x << 1)] || 269 src_data[((y << 1) * src_linesize) + (x << 1) + 1] || 270 src_data[(((y << 1) + 1) * src_linesize) + (x << 1)] || 271 src_data[(((y << 1) + 1) * src_linesize) + (x << 1) + 1]; 272 dst_data[(y * dst_linesize) + x] = FFMIN(1, dst_data[(y * dst_linesize) + x]); 273 } 274 } 275 276 convert_mask_to_strength_mask(dst_data, dst_linesize, 277 src_w/2, src_h/2, 0, max_mask_size); 278} 279 280static av_cold int init(AVFilterContext *ctx) 281{ 282 RemovelogoContext *s = ctx->priv; 283 int ***mask; 284 int ret = 0; 285 int a, b, c, w, h; 286 int full_max_mask_size, half_max_mask_size; 287 288 if (!s->filename) { 289 av_log(ctx, AV_LOG_ERROR, "The bitmap file name is mandatory\n"); 290 return AVERROR(EINVAL); 291 } 292 293 /* Load our mask image. */ 294 if ((ret = load_mask(&s->full_mask_data, &w, &h, s->filename, ctx)) < 0) 295 return ret; 296 s->mask_w = w; 297 s->mask_h = h; 298 299 convert_mask_to_strength_mask(s->full_mask_data, w, w, h, 300 16, &full_max_mask_size); 301 302 /* Create the scaled down mask image for the chroma planes. */ 303 if (!(s->half_mask_data = av_mallocz(w/2 * h/2))) 304 return AVERROR(ENOMEM); 305 generate_half_size_image(s->full_mask_data, w, 306 s->half_mask_data, w/2, 307 w, h, &half_max_mask_size); 308 309 s->max_mask_size = FFMAX(full_max_mask_size, half_max_mask_size); 310 311 /* Create a circular mask for each size up to max_mask_size. When 312 the filter is applied, the mask size is determined on a pixel 313 by pixel basis, with pixels nearer the edge of the logo getting 314 smaller mask sizes. */ 315 mask = (int ***)av_malloc_array(s->max_mask_size + 1, sizeof(int **)); 316 if (!mask) 317 return AVERROR(ENOMEM); 318 319 for (a = 0; a <= s->max_mask_size; a++) { 320 mask[a] = (int **)av_malloc_array((a * 2) + 1, sizeof(int *)); 321 if (!mask[a]) { 322 av_free(mask); 323 return AVERROR(ENOMEM); 324 } 325 for (b = -a; b <= a; b++) { 326 mask[a][b + a] = (int *)av_malloc_array((a * 2) + 1, sizeof(int)); 327 if (!mask[a][b + a]) { 328 av_free(mask); 329 return AVERROR(ENOMEM); 330 } 331 for (c = -a; c <= a; c++) { 332 if ((b * b) + (c * c) <= (a * a)) /* Circular 0/1 mask. */ 333 mask[a][b + a][c + a] = 1; 334 else 335 mask[a][b + a][c + a] = 0; 336 } 337 } 338 } 339 s->mask = mask; 340 341 /* Calculate our bounding rectangles, which determine in what 342 * region the logo resides for faster processing. */ 343 ff_calculate_bounding_box(&s->full_mask_bbox, s->full_mask_data, w, w, h, 0); 344 ff_calculate_bounding_box(&s->half_mask_bbox, s->half_mask_data, w/2, w/2, h/2, 0); 345 346#define SHOW_LOGO_INFO(mask_type) \ 347 av_log(ctx, AV_LOG_VERBOSE, #mask_type " x1:%d x2:%d y1:%d y2:%d max_mask_size:%d\n", \ 348 s->mask_type##_mask_bbox.x1, s->mask_type##_mask_bbox.x2, \ 349 s->mask_type##_mask_bbox.y1, s->mask_type##_mask_bbox.y2, \ 350 mask_type##_max_mask_size); 351 SHOW_LOGO_INFO(full); 352 SHOW_LOGO_INFO(half); 353 354 return 0; 355} 356 357static int config_props_input(AVFilterLink *inlink) 358{ 359 AVFilterContext *ctx = inlink->dst; 360 RemovelogoContext *s = ctx->priv; 361 362 if (inlink->w != s->mask_w || inlink->h != s->mask_h) { 363 av_log(ctx, AV_LOG_INFO, 364 "Mask image size %dx%d does not match with the input video size %dx%d\n", 365 s->mask_w, s->mask_h, inlink->w, inlink->h); 366 return AVERROR(EINVAL); 367 } 368 369 return 0; 370} 371 372/** 373 * Blur image. 374 * 375 * It takes a pixel that is inside the mask and blurs it. It does so 376 * by finding the average of all the pixels within the mask and 377 * outside of the mask. 378 * 379 * @param mask_data the mask plane to use for averaging 380 * @param image_data the image plane to blur 381 * @param w width of the image 382 * @param h height of the image 383 * @param x x-coordinate of the pixel to blur 384 * @param y y-coordinate of the pixel to blur 385 */ 386static unsigned int blur_pixel(int ***mask, 387 const uint8_t *mask_data, int mask_linesize, 388 uint8_t *image_data, int image_linesize, 389 int w, int h, int x, int y) 390{ 391 /* Mask size tells how large a circle to use. The radius is about 392 * (slightly larger than) mask size. */ 393 int mask_size; 394 int start_posx, start_posy, end_posx, end_posy; 395 int i, j; 396 unsigned int accumulator = 0, divisor = 0; 397 /* What pixel we are reading out of the circular blur mask. */ 398 const uint8_t *image_read_position; 399 /* What pixel we are reading out of the filter image. */ 400 const uint8_t *mask_read_position; 401 402 /* Prepare our bounding rectangle and clip it if need be. */ 403 mask_size = mask_data[y * mask_linesize + x]; 404 start_posx = FFMAX(0, x - mask_size); 405 start_posy = FFMAX(0, y - mask_size); 406 end_posx = FFMIN(w - 1, x + mask_size); 407 end_posy = FFMIN(h - 1, y + mask_size); 408 409 image_read_position = image_data + image_linesize * start_posy + start_posx; 410 mask_read_position = mask_data + mask_linesize * start_posy + start_posx; 411 412 for (j = start_posy; j <= end_posy; j++) { 413 for (i = start_posx; i <= end_posx; i++) { 414 /* Check if this pixel is in the mask or not. Only use the 415 * pixel if it is not. */ 416 if (!(*mask_read_position) && mask[mask_size][i - start_posx][j - start_posy]) { 417 accumulator += *image_read_position; 418 divisor++; 419 } 420 421 image_read_position++; 422 mask_read_position++; 423 } 424 425 image_read_position += (image_linesize - ((end_posx + 1) - start_posx)); 426 mask_read_position += (mask_linesize - ((end_posx + 1) - start_posx)); 427 } 428 429 /* If divisor is 0, it means that not a single pixel is outside of 430 the logo, so we have no data. Else we need to normalise the 431 data using the divisor. */ 432 return divisor == 0 ? 255: 433 (accumulator + (divisor / 2)) / divisor; /* divide, taking into account average rounding error */ 434} 435 436/** 437 * Blur image plane using a mask. 438 * 439 * @param source The image to have it's logo removed. 440 * @param destination Where the output image will be stored. 441 * @param source_stride How far apart (in memory) two consecutive lines are. 442 * @param destination Same as source_stride, but for the destination image. 443 * @param width Width of the image. This is the same for source and destination. 444 * @param height Height of the image. This is the same for source and destination. 445 * @param is_image_direct If the image is direct, then source and destination are 446 * the same and we can save a lot of time by not copying pixels that 447 * haven't changed. 448 * @param filter The image that stores the distance to the edge of the logo for 449 * each pixel. 450 * @param logo_start_x smallest x-coordinate that contains at least 1 logo pixel. 451 * @param logo_start_y smallest y-coordinate that contains at least 1 logo pixel. 452 * @param logo_end_x largest x-coordinate that contains at least 1 logo pixel. 453 * @param logo_end_y largest y-coordinate that contains at least 1 logo pixel. 454 * 455 * This function processes an entire plane. Pixels outside of the logo are copied 456 * to the output without change, and pixels inside the logo have the de-blurring 457 * function applied. 458 */ 459static void blur_image(int ***mask, 460 const uint8_t *src_data, int src_linesize, 461 uint8_t *dst_data, int dst_linesize, 462 const uint8_t *mask_data, int mask_linesize, 463 int w, int h, int direct, 464 FFBoundingBox *bbox) 465{ 466 int x, y; 467 uint8_t *dst_line; 468 const uint8_t *src_line; 469 470 if (!direct) 471 av_image_copy_plane(dst_data, dst_linesize, src_data, src_linesize, w, h); 472 473 for (y = bbox->y1; y <= bbox->y2; y++) { 474 src_line = src_data + src_linesize * y; 475 dst_line = dst_data + dst_linesize * y; 476 477 for (x = bbox->x1; x <= bbox->x2; x++) { 478 if (mask_data[y * mask_linesize + x]) { 479 /* Only process if we are in the mask. */ 480 dst_line[x] = blur_pixel(mask, 481 mask_data, mask_linesize, 482 dst_data, dst_linesize, 483 w, h, x, y); 484 } else { 485 /* Else just copy the data. */ 486 if (!direct) 487 dst_line[x] = src_line[x]; 488 } 489 } 490 } 491} 492 493static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref) 494{ 495 RemovelogoContext *s = inlink->dst->priv; 496 AVFilterLink *outlink = inlink->dst->outputs[0]; 497 AVFrame *outpicref; 498 int direct = 0; 499 500 if (av_frame_is_writable(inpicref)) { 501 direct = 1; 502 outpicref = inpicref; 503 } else { 504 outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h); 505 if (!outpicref) { 506 av_frame_free(&inpicref); 507 return AVERROR(ENOMEM); 508 } 509 av_frame_copy_props(outpicref, inpicref); 510 } 511 512 blur_image(s->mask, 513 inpicref ->data[0], inpicref ->linesize[0], 514 outpicref->data[0], outpicref->linesize[0], 515 s->full_mask_data, inlink->w, 516 inlink->w, inlink->h, direct, &s->full_mask_bbox); 517 blur_image(s->mask, 518 inpicref ->data[1], inpicref ->linesize[1], 519 outpicref->data[1], outpicref->linesize[1], 520 s->half_mask_data, inlink->w/2, 521 inlink->w/2, inlink->h/2, direct, &s->half_mask_bbox); 522 blur_image(s->mask, 523 inpicref ->data[2], inpicref ->linesize[2], 524 outpicref->data[2], outpicref->linesize[2], 525 s->half_mask_data, inlink->w/2, 526 inlink->w/2, inlink->h/2, direct, &s->half_mask_bbox); 527 528 if (!direct) 529 av_frame_free(&inpicref); 530 531 return ff_filter_frame(outlink, outpicref); 532} 533 534static av_cold void uninit(AVFilterContext *ctx) 535{ 536 RemovelogoContext *s = ctx->priv; 537 int a, b; 538 539 av_freep(&s->full_mask_data); 540 av_freep(&s->half_mask_data); 541 542 if (s->mask) { 543 /* Loop through each mask. */ 544 for (a = 0; a <= s->max_mask_size; a++) { 545 /* Loop through each scanline in a mask. */ 546 for (b = -a; b <= a; b++) { 547 av_freep(&s->mask[a][b + a]); /* Free a scanline. */ 548 } 549 av_freep(&s->mask[a]); 550 } 551 /* Free the array of pointers pointing to the masks. */ 552 av_freep(&s->mask); 553 } 554} 555 556static const AVFilterPad removelogo_inputs[] = { 557 { 558 .name = "default", 559 .type = AVMEDIA_TYPE_VIDEO, 560 .config_props = config_props_input, 561 .filter_frame = filter_frame, 562 }, 563 { NULL } 564}; 565 566static const AVFilterPad removelogo_outputs[] = { 567 { 568 .name = "default", 569 .type = AVMEDIA_TYPE_VIDEO, 570 }, 571 { NULL } 572}; 573 574AVFilter ff_vf_removelogo = { 575 .name = "removelogo", 576 .description = NULL_IF_CONFIG_SMALL("Remove a TV logo based on a mask image."), 577 .priv_size = sizeof(RemovelogoContext), 578 .init = init, 579 .uninit = uninit, 580 .query_formats = query_formats, 581 .inputs = removelogo_inputs, 582 .outputs = removelogo_outputs, 583 .priv_class = &removelogo_class, 584 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, 585}; 586