1/*
2 * Copyright (c) 2005 Robert Edele <yartrebo@earthlink.net>
3 * Copyright (c) 2012 Stefano Sabatini
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22/**
23 * @file
24 * Advanced blur-based logo removing filter
25 *
26 * This filter loads an image mask file showing where a logo is and
27 * uses a blur transform to remove the logo.
28 *
29 * Based on the libmpcodecs remove-logo filter by Robert Edele.
30 */
31
32/**
33 * This code implements a filter to remove annoying TV logos and other annoying
34 * images placed onto a video stream. It works by filling in the pixels that
35 * comprise the logo with neighboring pixels. The transform is very loosely
36 * based on a gaussian blur, but it is different enough to merit its own
37 * paragraph later on. It is a major improvement on the old delogo filter as it
38 * both uses a better blurring algorithm and uses a bitmap to use an arbitrary
39 * and generally much tighter fitting shape than a rectangle.
40 *
41 * The logo removal algorithm has two key points. The first is that it
42 * distinguishes between pixels in the logo and those not in the logo by using
43 * the passed-in bitmap. Pixels not in the logo are copied over directly without
44 * being modified and they also serve as source pixels for the logo
45 * fill-in. Pixels inside the logo have the mask applied.
46 *
47 * At init-time the bitmap is reprocessed internally, and the distance to the
48 * nearest edge of the logo (Manhattan distance), along with a little extra to
49 * remove rough edges, is stored in each pixel. This is done using an in-place
50 * erosion algorithm, and incrementing each pixel that survives any given
51 * erosion.  Once every pixel is eroded, the maximum value is recorded, and a
52 * set of masks from size 0 to this size are generaged. The masks are circular
53 * binary masks, where each pixel within a radius N (where N is the size of the
54 * mask) is a 1, and all other pixels are a 0. Although a gaussian mask would be
55 * more mathematically accurate, a binary mask works better in practice because
56 * we generally do not use the central pixels in the mask (because they are in
57 * the logo region), and thus a gaussian mask will cause too little blur and
58 * thus a very unstable image.
59 *
60 * The mask is applied in a special way. Namely, only pixels in the mask that
61 * line up to pixels outside the logo are used. The dynamic mask size means that
62 * the mask is just big enough so that the edges touch pixels outside the logo,
63 * so the blurring is kept to a minimum and at least the first boundary
64 * condition is met (that the image function itself is continuous), even if the
65 * second boundary condition (that the derivative of the image function is
66 * continuous) is not met. A masking algorithm that does preserve the second
67 * boundary coundition (perhaps something based on a highly-modified bi-cubic
68 * algorithm) should offer even better results on paper, but the noise in a
69 * typical TV signal should make anything based on derivatives hopelessly noisy.
70 */
71
72#include "libavutil/imgutils.h"
73#include "libavutil/opt.h"
74#include "avfilter.h"
75#include "formats.h"
76#include "internal.h"
77#include "video.h"
78#include "bbox.h"
79#include "lavfutils.h"
80#include "lswsutils.h"
81
82typedef struct {
83    const AVClass *class;
84    char *filename;
85    /* Stores our collection of masks. The first is for an array of
86       the second for the y axis, and the third for the x axis. */
87    int ***mask;
88    int max_mask_size;
89    int mask_w, mask_h;
90
91    uint8_t      *full_mask_data;
92    FFBoundingBox full_mask_bbox;
93    uint8_t      *half_mask_data;
94    FFBoundingBox half_mask_bbox;
95} RemovelogoContext;
96
97#define OFFSET(x) offsetof(RemovelogoContext, x)
98#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
99static const AVOption removelogo_options[] = {
100    { "filename", "set bitmap filename", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
101    { "f",        "set bitmap filename", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
102    { NULL }
103};
104
105AVFILTER_DEFINE_CLASS(removelogo);
106
107/**
108 * Choose a slightly larger mask size to improve performance.
109 *
110 * This function maps the absolute minimum mask size needed to the
111 * mask size we'll actually use. f(x) = x (the smallest that will
112 * work) will produce the sharpest results, but will be quite
113 * jittery. f(x) = 1.25x (what I'm using) is a good tradeoff in my
114 * opinion. This will calculate only at init-time, so you can put a
115 * long expression here without effecting performance.
116 */
117#define apply_mask_fudge_factor(x) (((x) >> 2) + x)
118
119/**
120 * Pre-process an image to give distance information.
121 *
122 * This function takes a bitmap image and converts it in place into a
123 * distance image. A distance image is zero for pixels outside of the
124 * logo and is the Manhattan distance (|dx| + |dy|) from the logo edge
125 * for pixels inside of the logo. This will overestimate the distance,
126 * but that is safe, and is far easier to implement than a proper
127 * pythagorean distance since I'm using a modified erosion algorithm
128 * to compute the distances.
129 *
130 * @param mask image which will be converted from a greyscale image
131 * into a distance image.
132 */
133static void convert_mask_to_strength_mask(uint8_t *data, int linesize,
134                                          int w, int h, int min_val,
135                                          int *max_mask_size)
136{
137    int x, y;
138
139    /* How many times we've gone through the loop. Used in the
140       in-place erosion algorithm and to get us max_mask_size later on. */
141    int current_pass = 0;
142
143    /* set all non-zero values to 1 */
144    for (y = 0; y < h; y++)
145        for (x = 0; x < w; x++)
146            data[y*linesize + x] = data[y*linesize + x] > min_val;
147
148    /* For each pass, if a pixel is itself the same value as the
149       current pass, and its four neighbors are too, then it is
150       incremented. If no pixels are incremented by the end of the
151       pass, then we go again. Edge pixels are counted as always
152       excluded (this should be true anyway for any sane mask, but if
153       it isn't this will ensure that we eventually exit). */
154    while (1) {
155        /* If this doesn't get set by the end of this pass, then we're done. */
156        int has_anything_changed = 0;
157        uint8_t *current_pixel0 = data + 1 + linesize, *current_pixel;
158        current_pass++;
159
160        for (y = 1; y < h-1; y++) {
161            current_pixel = current_pixel0;
162            for (x = 1; x < w-1; x++) {
163                /* Apply the in-place erosion transform. It is based
164                   on the following two premises:
165                   1 - Any pixel that fails 1 erosion will fail all
166                       future erosions.
167
168                   2 - Only pixels having survived all erosions up to
169                       the present will be >= to current_pass.
170                   It doesn't matter if it survived the current pass,
171                   failed it, or hasn't been tested yet.  By using >=
172                   instead of ==, we allow the algorithm to work in
173                   place. */
174                if ( *current_pixel      >= current_pass &&
175                    *(current_pixel + 1) >= current_pass &&
176                    *(current_pixel - 1) >= current_pass &&
177                    *(current_pixel + linesize) >= current_pass &&
178                    *(current_pixel - linesize) >= current_pass) {
179                    /* Increment the value since it still has not been
180                     * eroded, as evidenced by the if statement that
181                     * just evaluated to true. */
182                    (*current_pixel)++;
183                    has_anything_changed = 1;
184                }
185                current_pixel++;
186            }
187            current_pixel0 += linesize;
188        }
189        if (!has_anything_changed)
190            break;
191    }
192
193    /* Apply the fudge factor, which will increase the size of the
194     * mask a little to reduce jitter at the cost of more blur. */
195    for (y = 1; y < h - 1; y++)
196        for (x = 1; x < w - 1; x++)
197            data[(y * linesize) + x] = apply_mask_fudge_factor(data[(y * linesize) + x]);
198
199    /* As a side-effect, we now know the maximum mask size, which
200     * we'll use to generate our masks. */
201    /* Apply the fudge factor to this number too, since we must ensure
202     * that enough masks are generated. */
203    *max_mask_size = apply_mask_fudge_factor(current_pass + 1);
204}
205
206static int query_formats(AVFilterContext *ctx)
207{
208    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE };
209    ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
210    return 0;
211}
212
213static int load_mask(uint8_t **mask, int *w, int *h,
214                     const char *filename, void *log_ctx)
215{
216    int ret;
217    enum AVPixelFormat pix_fmt;
218    uint8_t *src_data[4], *gray_data[4];
219    int src_linesize[4], gray_linesize[4];
220
221    /* load image from file */
222    if ((ret = ff_load_image(src_data, src_linesize, w, h, &pix_fmt, filename, log_ctx)) < 0)
223        return ret;
224
225    /* convert the image to GRAY8 */
226    if ((ret = ff_scale_image(gray_data, gray_linesize, *w, *h, AV_PIX_FMT_GRAY8,
227                              src_data, src_linesize, *w, *h, pix_fmt,
228                              log_ctx)) < 0)
229        goto end;
230
231    /* copy mask to a newly allocated array */
232    *mask = av_malloc(*w * *h);
233    if (!*mask)
234        ret = AVERROR(ENOMEM);
235    av_image_copy_plane(*mask, *w, gray_data[0], gray_linesize[0], *w, *h);
236
237end:
238    av_freep(&src_data[0]);
239    av_freep(&gray_data[0]);
240    return ret;
241}
242
243/**
244 * Generate a scaled down image with half width, height, and intensity.
245 *
246 * This function not only scales down an image, but halves the value
247 * in each pixel too. The purpose of this is to produce a chroma
248 * filter image out of a luma filter image. The pixel values store the
249 * distance to the edge of the logo and halving the dimensions halves
250 * the distance. This function rounds up, because a downwards rounding
251 * error could cause the filter to fail, but an upwards rounding error
252 * will only cause a minor amount of excess blur in the chroma planes.
253 */
254static void generate_half_size_image(const uint8_t *src_data, int src_linesize,
255                                     uint8_t *dst_data, int dst_linesize,
256                                     int src_w, int src_h,
257                                     int *max_mask_size)
258{
259    int x, y;
260
261    /* Copy over the image data, using the average of 4 pixels for to
262     * calculate each downsampled pixel. */
263    for (y = 0; y < src_h/2; y++) {
264        for (x = 0; x < src_w/2; x++) {
265            /* Set the pixel if there exists a non-zero value in the
266             * source pixels, else clear it. */
267            dst_data[(y * dst_linesize) + x] =
268                src_data[((y << 1) * src_linesize) + (x << 1)] ||
269                src_data[((y << 1) * src_linesize) + (x << 1) + 1] ||
270                src_data[(((y << 1) + 1) * src_linesize) + (x << 1)] ||
271                src_data[(((y << 1) + 1) * src_linesize) + (x << 1) + 1];
272            dst_data[(y * dst_linesize) + x] = FFMIN(1, dst_data[(y * dst_linesize) + x]);
273        }
274    }
275
276    convert_mask_to_strength_mask(dst_data, dst_linesize,
277                                  src_w/2, src_h/2, 0, max_mask_size);
278}
279
280static av_cold int init(AVFilterContext *ctx)
281{
282    RemovelogoContext *s = ctx->priv;
283    int ***mask;
284    int ret = 0;
285    int a, b, c, w, h;
286    int full_max_mask_size, half_max_mask_size;
287
288    if (!s->filename) {
289        av_log(ctx, AV_LOG_ERROR, "The bitmap file name is mandatory\n");
290        return AVERROR(EINVAL);
291    }
292
293    /* Load our mask image. */
294    if ((ret = load_mask(&s->full_mask_data, &w, &h, s->filename, ctx)) < 0)
295        return ret;
296    s->mask_w = w;
297    s->mask_h = h;
298
299    convert_mask_to_strength_mask(s->full_mask_data, w, w, h,
300                                  16, &full_max_mask_size);
301
302    /* Create the scaled down mask image for the chroma planes. */
303    if (!(s->half_mask_data = av_mallocz(w/2 * h/2)))
304        return AVERROR(ENOMEM);
305    generate_half_size_image(s->full_mask_data, w,
306                             s->half_mask_data, w/2,
307                             w, h, &half_max_mask_size);
308
309    s->max_mask_size = FFMAX(full_max_mask_size, half_max_mask_size);
310
311    /* Create a circular mask for each size up to max_mask_size. When
312       the filter is applied, the mask size is determined on a pixel
313       by pixel basis, with pixels nearer the edge of the logo getting
314       smaller mask sizes. */
315    mask = (int ***)av_malloc_array(s->max_mask_size + 1, sizeof(int **));
316    if (!mask)
317        return AVERROR(ENOMEM);
318
319    for (a = 0; a <= s->max_mask_size; a++) {
320        mask[a] = (int **)av_malloc_array((a * 2) + 1, sizeof(int *));
321        if (!mask[a]) {
322            av_free(mask);
323            return AVERROR(ENOMEM);
324        }
325        for (b = -a; b <= a; b++) {
326            mask[a][b + a] = (int *)av_malloc_array((a * 2) + 1, sizeof(int));
327            if (!mask[a][b + a]) {
328                av_free(mask);
329                return AVERROR(ENOMEM);
330            }
331            for (c = -a; c <= a; c++) {
332                if ((b * b) + (c * c) <= (a * a)) /* Circular 0/1 mask. */
333                    mask[a][b + a][c + a] = 1;
334                else
335                    mask[a][b + a][c + a] = 0;
336            }
337        }
338    }
339    s->mask = mask;
340
341    /* Calculate our bounding rectangles, which determine in what
342     * region the logo resides for faster processing. */
343    ff_calculate_bounding_box(&s->full_mask_bbox, s->full_mask_data, w, w, h, 0);
344    ff_calculate_bounding_box(&s->half_mask_bbox, s->half_mask_data, w/2, w/2, h/2, 0);
345
346#define SHOW_LOGO_INFO(mask_type)                                       \
347    av_log(ctx, AV_LOG_VERBOSE, #mask_type " x1:%d x2:%d y1:%d y2:%d max_mask_size:%d\n", \
348           s->mask_type##_mask_bbox.x1, s->mask_type##_mask_bbox.x2, \
349           s->mask_type##_mask_bbox.y1, s->mask_type##_mask_bbox.y2, \
350           mask_type##_max_mask_size);
351    SHOW_LOGO_INFO(full);
352    SHOW_LOGO_INFO(half);
353
354    return 0;
355}
356
357static int config_props_input(AVFilterLink *inlink)
358{
359    AVFilterContext *ctx = inlink->dst;
360    RemovelogoContext *s = ctx->priv;
361
362    if (inlink->w != s->mask_w || inlink->h != s->mask_h) {
363        av_log(ctx, AV_LOG_INFO,
364               "Mask image size %dx%d does not match with the input video size %dx%d\n",
365               s->mask_w, s->mask_h, inlink->w, inlink->h);
366        return AVERROR(EINVAL);
367    }
368
369    return 0;
370}
371
372/**
373 * Blur image.
374 *
375 * It takes a pixel that is inside the mask and blurs it. It does so
376 * by finding the average of all the pixels within the mask and
377 * outside of the mask.
378 *
379 * @param mask_data  the mask plane to use for averaging
380 * @param image_data the image plane to blur
381 * @param w width of the image
382 * @param h height of the image
383 * @param x x-coordinate of the pixel to blur
384 * @param y y-coordinate of the pixel to blur
385 */
386static unsigned int blur_pixel(int ***mask,
387                               const uint8_t *mask_data, int mask_linesize,
388                               uint8_t       *image_data, int image_linesize,
389                               int w, int h, int x, int y)
390{
391    /* Mask size tells how large a circle to use. The radius is about
392     * (slightly larger than) mask size. */
393    int mask_size;
394    int start_posx, start_posy, end_posx, end_posy;
395    int i, j;
396    unsigned int accumulator = 0, divisor = 0;
397    /* What pixel we are reading out of the circular blur mask. */
398    const uint8_t *image_read_position;
399    /* What pixel we are reading out of the filter image. */
400    const uint8_t *mask_read_position;
401
402    /* Prepare our bounding rectangle and clip it if need be. */
403    mask_size  = mask_data[y * mask_linesize + x];
404    start_posx = FFMAX(0, x - mask_size);
405    start_posy = FFMAX(0, y - mask_size);
406    end_posx   = FFMIN(w - 1, x + mask_size);
407    end_posy   = FFMIN(h - 1, y + mask_size);
408
409    image_read_position = image_data + image_linesize * start_posy + start_posx;
410    mask_read_position  = mask_data  + mask_linesize  * start_posy + start_posx;
411
412    for (j = start_posy; j <= end_posy; j++) {
413        for (i = start_posx; i <= end_posx; i++) {
414            /* Check if this pixel is in the mask or not. Only use the
415             * pixel if it is not. */
416            if (!(*mask_read_position) && mask[mask_size][i - start_posx][j - start_posy]) {
417                accumulator += *image_read_position;
418                divisor++;
419            }
420
421            image_read_position++;
422            mask_read_position++;
423        }
424
425        image_read_position += (image_linesize - ((end_posx + 1) - start_posx));
426        mask_read_position  += (mask_linesize - ((end_posx + 1) - start_posx));
427    }
428
429    /* If divisor is 0, it means that not a single pixel is outside of
430       the logo, so we have no data.  Else we need to normalise the
431       data using the divisor. */
432    return divisor == 0 ? 255:
433        (accumulator + (divisor / 2)) / divisor;  /* divide, taking into account average rounding error */
434}
435
436/**
437 * Blur image plane using a mask.
438 *
439 * @param source The image to have it's logo removed.
440 * @param destination Where the output image will be stored.
441 * @param source_stride How far apart (in memory) two consecutive lines are.
442 * @param destination Same as source_stride, but for the destination image.
443 * @param width Width of the image. This is the same for source and destination.
444 * @param height Height of the image. This is the same for source and destination.
445 * @param is_image_direct If the image is direct, then source and destination are
446 *        the same and we can save a lot of time by not copying pixels that
447 *        haven't changed.
448 * @param filter The image that stores the distance to the edge of the logo for
449 *        each pixel.
450 * @param logo_start_x smallest x-coordinate that contains at least 1 logo pixel.
451 * @param logo_start_y smallest y-coordinate that contains at least 1 logo pixel.
452 * @param logo_end_x   largest x-coordinate that contains at least 1 logo pixel.
453 * @param logo_end_y   largest y-coordinate that contains at least 1 logo pixel.
454 *
455 * This function processes an entire plane. Pixels outside of the logo are copied
456 * to the output without change, and pixels inside the logo have the de-blurring
457 * function applied.
458 */
459static void blur_image(int ***mask,
460                       const uint8_t *src_data,  int src_linesize,
461                             uint8_t *dst_data,  int dst_linesize,
462                       const uint8_t *mask_data, int mask_linesize,
463                       int w, int h, int direct,
464                       FFBoundingBox *bbox)
465{
466    int x, y;
467    uint8_t *dst_line;
468    const uint8_t *src_line;
469
470    if (!direct)
471        av_image_copy_plane(dst_data, dst_linesize, src_data, src_linesize, w, h);
472
473    for (y = bbox->y1; y <= bbox->y2; y++) {
474        src_line = src_data + src_linesize * y;
475        dst_line = dst_data + dst_linesize * y;
476
477        for (x = bbox->x1; x <= bbox->x2; x++) {
478             if (mask_data[y * mask_linesize + x]) {
479                /* Only process if we are in the mask. */
480                 dst_line[x] = blur_pixel(mask,
481                                          mask_data, mask_linesize,
482                                          dst_data, dst_linesize,
483                                          w, h, x, y);
484            } else {
485                /* Else just copy the data. */
486                if (!direct)
487                    dst_line[x] = src_line[x];
488            }
489        }
490    }
491}
492
493static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
494{
495    RemovelogoContext *s = inlink->dst->priv;
496    AVFilterLink *outlink = inlink->dst->outputs[0];
497    AVFrame *outpicref;
498    int direct = 0;
499
500    if (av_frame_is_writable(inpicref)) {
501        direct = 1;
502        outpicref = inpicref;
503    } else {
504        outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
505        if (!outpicref) {
506            av_frame_free(&inpicref);
507            return AVERROR(ENOMEM);
508        }
509        av_frame_copy_props(outpicref, inpicref);
510    }
511
512    blur_image(s->mask,
513               inpicref ->data[0], inpicref ->linesize[0],
514               outpicref->data[0], outpicref->linesize[0],
515               s->full_mask_data, inlink->w,
516               inlink->w, inlink->h, direct, &s->full_mask_bbox);
517    blur_image(s->mask,
518               inpicref ->data[1], inpicref ->linesize[1],
519               outpicref->data[1], outpicref->linesize[1],
520               s->half_mask_data, inlink->w/2,
521               inlink->w/2, inlink->h/2, direct, &s->half_mask_bbox);
522    blur_image(s->mask,
523               inpicref ->data[2], inpicref ->linesize[2],
524               outpicref->data[2], outpicref->linesize[2],
525               s->half_mask_data, inlink->w/2,
526               inlink->w/2, inlink->h/2, direct, &s->half_mask_bbox);
527
528    if (!direct)
529        av_frame_free(&inpicref);
530
531    return ff_filter_frame(outlink, outpicref);
532}
533
534static av_cold void uninit(AVFilterContext *ctx)
535{
536    RemovelogoContext *s = ctx->priv;
537    int a, b;
538
539    av_freep(&s->full_mask_data);
540    av_freep(&s->half_mask_data);
541
542    if (s->mask) {
543        /* Loop through each mask. */
544        for (a = 0; a <= s->max_mask_size; a++) {
545            /* Loop through each scanline in a mask. */
546            for (b = -a; b <= a; b++) {
547                av_freep(&s->mask[a][b + a]); /* Free a scanline. */
548            }
549            av_freep(&s->mask[a]);
550        }
551        /* Free the array of pointers pointing to the masks. */
552        av_freep(&s->mask);
553    }
554}
555
556static const AVFilterPad removelogo_inputs[] = {
557    {
558        .name         = "default",
559        .type         = AVMEDIA_TYPE_VIDEO,
560        .config_props = config_props_input,
561        .filter_frame = filter_frame,
562    },
563    { NULL }
564};
565
566static const AVFilterPad removelogo_outputs[] = {
567    {
568        .name = "default",
569        .type = AVMEDIA_TYPE_VIDEO,
570    },
571    { NULL }
572};
573
574AVFilter ff_vf_removelogo = {
575    .name          = "removelogo",
576    .description   = NULL_IF_CONFIG_SMALL("Remove a TV logo based on a mask image."),
577    .priv_size     = sizeof(RemovelogoContext),
578    .init          = init,
579    .uninit        = uninit,
580    .query_formats = query_formats,
581    .inputs        = removelogo_inputs,
582    .outputs       = removelogo_outputs,
583    .priv_class    = &removelogo_class,
584    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
585};
586