1/* 2 * Copyright (c) 2013 Georg Martius <georg dot martius at web dot de> 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#define DEFAULT_INPUT_NAME "transforms.trf" 22 23#include <vid.stab/libvidstab.h> 24 25#include "libavutil/common.h" 26#include "libavutil/opt.h" 27#include "libavutil/imgutils.h" 28#include "avfilter.h" 29#include "internal.h" 30 31#include "vidstabutils.h" 32 33typedef struct { 34 const AVClass *class; 35 36 VSTransformData td; 37 VSTransformConfig conf; 38 39 VSTransformations trans; // transformations 40 char *input; // name of transform file 41 int tripod; 42 int debug; 43} TransformContext; 44 45#define OFFSET(x) offsetof(TransformContext, x) 46#define OFFSETC(x) (offsetof(TransformContext, conf)+offsetof(VSTransformConfig, x)) 47#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM 48 49static const AVOption vidstabtransform_options[] = { 50 {"input", "set path to the file storing the transforms", OFFSET(input), 51 AV_OPT_TYPE_STRING, {.str = DEFAULT_INPUT_NAME}, .flags = FLAGS }, 52 {"smoothing", "set number of frames*2 + 1 used for lowpass filtering", OFFSETC(smoothing), 53 AV_OPT_TYPE_INT, {.i64 = 15}, 0, 1000, FLAGS}, 54 55 {"optalgo", "set camera path optimization algo", OFFSETC(camPathAlgo), 56 AV_OPT_TYPE_INT, {.i64 = VSOptimalL1}, VSOptimalL1, VSAvg, FLAGS, "optalgo"}, 57 { "opt", "global optimization", 0, // from version 1.0 on 58 AV_OPT_TYPE_CONST, {.i64 = VSOptimalL1 }, 0, 0, FLAGS, "optalgo"}, 59 { "gauss", "gaussian kernel", 0, 60 AV_OPT_TYPE_CONST, {.i64 = VSGaussian }, 0, 0, FLAGS, "optalgo"}, 61 { "avg", "simple averaging on motion", 0, 62 AV_OPT_TYPE_CONST, {.i64 = VSAvg }, 0, 0, FLAGS, "optalgo"}, 63 64 {"maxshift", "set maximal number of pixels to translate image", OFFSETC(maxShift), 65 AV_OPT_TYPE_INT, {.i64 = -1}, -1, 500, FLAGS}, 66 {"maxangle", "set maximal angle in rad to rotate image", OFFSETC(maxAngle), 67 AV_OPT_TYPE_DOUBLE, {.dbl = -1.0}, -1.0, 3.14, FLAGS}, 68 69 {"crop", "set cropping mode", OFFSETC(crop), 70 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS, "crop"}, 71 { "keep", "keep border", 0, 72 AV_OPT_TYPE_CONST, {.i64 = VSKeepBorder }, 0, 0, FLAGS, "crop"}, 73 { "black", "black border", 0, 74 AV_OPT_TYPE_CONST, {.i64 = VSCropBorder }, 0, 0, FLAGS, "crop"}, 75 76 {"invert", "invert transforms", OFFSETC(invert), 77 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS}, 78 {"relative", "consider transforms as relative", OFFSETC(relative), 79 AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, FLAGS}, 80 {"zoom", "set percentage to zoom (>0: zoom in, <0: zoom out", OFFSETC(zoom), 81 AV_OPT_TYPE_DOUBLE, {.dbl = 0}, -100, 100, FLAGS}, 82 {"optzoom", "set optimal zoom (0: nothing, 1: optimal static zoom, 2: optimal dynamic zoom)", OFFSETC(optZoom), 83 AV_OPT_TYPE_INT, {.i64 = 1}, 0, 2, FLAGS}, 84 {"zoomspeed", "for adative zoom: percent to zoom maximally each frame", OFFSETC(zoomSpeed), 85 AV_OPT_TYPE_DOUBLE, {.dbl = 0.25}, 0, 5, FLAGS}, 86 87 {"interpol", "set type of interpolation", OFFSETC(interpolType), 88 AV_OPT_TYPE_INT, {.i64 = 2}, 0, 3, FLAGS, "interpol"}, 89 { "no", "no interpolation", 0, 90 AV_OPT_TYPE_CONST, {.i64 = VS_Zero }, 0, 0, FLAGS, "interpol"}, 91 { "linear", "linear (horizontal)", 0, 92 AV_OPT_TYPE_CONST, {.i64 = VS_Linear }, 0, 0, FLAGS, "interpol"}, 93 { "bilinear","bi-linear", 0, 94 AV_OPT_TYPE_CONST, {.i64 = VS_BiLinear},0, 0, FLAGS, "interpol"}, 95 { "bicubic", "bi-cubic", 0, 96 AV_OPT_TYPE_CONST, {.i64 = VS_BiCubic },0, 0, FLAGS, "interpol"}, 97 98 {"tripod", "enable virtual tripod mode (same as relative=0:smoothing=0)", OFFSET(tripod), 99 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS}, 100 {"debug", "enable debug mode and writer global motions information to file", OFFSET(debug), 101 AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS}, 102 {NULL} 103}; 104 105AVFILTER_DEFINE_CLASS(vidstabtransform); 106 107static av_cold int init(AVFilterContext *ctx) 108{ 109 TransformContext *tc = ctx->priv; 110 vs_set_mem_and_log_functions(); 111 tc->class = &vidstabtransform_class; 112 av_log(ctx, AV_LOG_VERBOSE, "vidstabtransform filter: init %s\n", LIBVIDSTAB_VERSION); 113 return 0; 114} 115 116static av_cold void uninit(AVFilterContext *ctx) 117{ 118 TransformContext *tc = ctx->priv; 119 120 vsTransformDataCleanup(&tc->td); 121 vsTransformationsCleanup(&tc->trans); 122} 123 124static int query_formats(AVFilterContext *ctx) 125{ 126 // If you add something here also add it in vidstabutils.c 127 static const enum AVPixelFormat pix_fmts[] = { 128 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, 129 AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P, 130 AV_PIX_FMT_YUV440P, AV_PIX_FMT_GRAY8, 131 AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, AV_PIX_FMT_RGBA, 132 AV_PIX_FMT_NONE 133 }; 134 135 ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); 136 return 0; 137} 138 139 140static int config_input(AVFilterLink *inlink) 141{ 142 AVFilterContext *ctx = inlink->dst; 143 TransformContext *tc = ctx->priv; 144 FILE *f; 145 146 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); 147 148 VSTransformData *td = &(tc->td); 149 150 VSFrameInfo fi_src; 151 VSFrameInfo fi_dest; 152 153 if (!vsFrameInfoInit(&fi_src, inlink->w, inlink->h, 154 av_2_vs_pixel_format(ctx, inlink->format)) || 155 !vsFrameInfoInit(&fi_dest, inlink->w, inlink->h, 156 av_2_vs_pixel_format(ctx, inlink->format))) { 157 av_log(ctx, AV_LOG_ERROR, "unknown pixel format: %i (%s)", 158 inlink->format, desc->name); 159 return AVERROR(EINVAL); 160 } 161 162 if (fi_src.bytesPerPixel != av_get_bits_per_pixel(desc)/8 || 163 fi_src.log2ChromaW != desc->log2_chroma_w || 164 fi_src.log2ChromaH != desc->log2_chroma_h) { 165 av_log(ctx, AV_LOG_ERROR, "pixel-format error: bpp %i<>%i ", 166 fi_src.bytesPerPixel, av_get_bits_per_pixel(desc)/8); 167 av_log(ctx, AV_LOG_ERROR, "chroma_subsampl: w: %i<>%i h: %i<>%i\n", 168 fi_src.log2ChromaW, desc->log2_chroma_w, 169 fi_src.log2ChromaH, desc->log2_chroma_h); 170 return AVERROR(EINVAL); 171 } 172 173 // set values that are not initializes by the options 174 tc->conf.modName = "vidstabtransform"; 175 tc->conf.verbose = 1 + tc->debug; 176 if (tc->tripod) { 177 av_log(ctx, AV_LOG_INFO, "Virtual tripod mode: relative=0, smoothing=0\n"); 178 tc->conf.relative = 0; 179 tc->conf.smoothing = 0; 180 } 181 tc->conf.simpleMotionCalculation = 0; 182 tc->conf.storeTransforms = tc->debug; 183 tc->conf.smoothZoom = 0; 184 185 if (vsTransformDataInit(td, &tc->conf, &fi_src, &fi_dest) != VS_OK) { 186 av_log(ctx, AV_LOG_ERROR, "initialization of vid.stab transform failed, please report a BUG\n"); 187 return AVERROR(EINVAL); 188 } 189 190 vsTransformGetConfig(&tc->conf, td); 191 av_log(ctx, AV_LOG_INFO, "Video transformation/stabilization settings (pass 2/2):\n"); 192 av_log(ctx, AV_LOG_INFO, " input = %s\n", tc->input); 193 av_log(ctx, AV_LOG_INFO, " smoothing = %d\n", tc->conf.smoothing); 194 av_log(ctx, AV_LOG_INFO, " optalgo = %s\n", 195 tc->conf.camPathAlgo == VSOptimalL1 ? "opt" : 196 (tc->conf.camPathAlgo == VSGaussian ? "gauss" : "avg")); 197 av_log(ctx, AV_LOG_INFO, " maxshift = %d\n", tc->conf.maxShift); 198 av_log(ctx, AV_LOG_INFO, " maxangle = %f\n", tc->conf.maxAngle); 199 av_log(ctx, AV_LOG_INFO, " crop = %s\n", tc->conf.crop ? "Black" : "Keep"); 200 av_log(ctx, AV_LOG_INFO, " relative = %s\n", tc->conf.relative ? "True": "False"); 201 av_log(ctx, AV_LOG_INFO, " invert = %s\n", tc->conf.invert ? "True" : "False"); 202 av_log(ctx, AV_LOG_INFO, " zoom = %f\n", tc->conf.zoom); 203 av_log(ctx, AV_LOG_INFO, " optzoom = %s\n", 204 tc->conf.optZoom == 1 ? "Static (1)" : (tc->conf.optZoom == 2 ? "Dynamic (2)" : "Off (0)")); 205 if (tc->conf.optZoom == 2) 206 av_log(ctx, AV_LOG_INFO, " zoomspeed = %g\n", tc->conf.zoomSpeed); 207 av_log(ctx, AV_LOG_INFO, " interpol = %s\n", getInterpolationTypeName(tc->conf.interpolType)); 208 209 f = fopen(tc->input, "r"); 210 if (f == NULL) { 211 av_log(ctx, AV_LOG_ERROR, "cannot open input file %s\n", tc->input); 212 return AVERROR(errno); 213 } else { 214 VSManyLocalMotions mlms; 215 if (vsReadLocalMotionsFile(f, &mlms) == VS_OK) { 216 // calculate the actual transforms from the local motions 217 if (vsLocalmotions2Transforms(td, &mlms, &tc->trans) != VS_OK) { 218 av_log(ctx, AV_LOG_ERROR, "calculating transformations failed\n"); 219 return AVERROR(EINVAL); 220 } 221 } else { // try to read old format 222 if (!vsReadOldTransforms(td, f, &tc->trans)) { /* read input file */ 223 av_log(ctx, AV_LOG_ERROR, "error parsing input file %s\n", tc->input); 224 return AVERROR(EINVAL); 225 } 226 } 227 } 228 fclose(f); 229 230 if (vsPreprocessTransforms(td, &tc->trans) != VS_OK) { 231 av_log(ctx, AV_LOG_ERROR, "error while preprocessing transforms\n"); 232 return AVERROR(EINVAL); 233 } 234 235 // TODO: add sharpening, so far the user needs to call the unsharp filter manually 236 return 0; 237} 238 239 240static int filter_frame(AVFilterLink *inlink, AVFrame *in) 241{ 242 AVFilterContext *ctx = inlink->dst; 243 TransformContext *tc = ctx->priv; 244 VSTransformData* td = &(tc->td); 245 246 AVFilterLink *outlink = inlink->dst->outputs[0]; 247 int direct = 0; 248 AVFrame *out; 249 VSFrame inframe; 250 int plane; 251 252 if (av_frame_is_writable(in)) { 253 direct = 1; 254 out = in; 255 } else { 256 out = ff_get_video_buffer(outlink, outlink->w, outlink->h); 257 if (!out) { 258 av_frame_free(&in); 259 return AVERROR(ENOMEM); 260 } 261 av_frame_copy_props(out, in); 262 } 263 264 for (plane = 0; plane < vsTransformGetSrcFrameInfo(td)->planes; plane++) { 265 inframe.data[plane] = in->data[plane]; 266 inframe.linesize[plane] = in->linesize[plane]; 267 } 268 if (direct) { 269 vsTransformPrepare(td, &inframe, &inframe); 270 } else { // separate frames 271 VSFrame outframe; 272 for (plane = 0; plane < vsTransformGetDestFrameInfo(td)->planes; plane++) { 273 outframe.data[plane] = out->data[plane]; 274 outframe.linesize[plane] = out->linesize[plane]; 275 } 276 vsTransformPrepare(td, &inframe, &outframe); 277 } 278 279 vsDoTransform(td, vsGetNextTransform(td, &tc->trans)); 280 281 vsTransformFinish(td); 282 283 if (!direct) 284 av_frame_free(&in); 285 286 return ff_filter_frame(outlink, out); 287} 288 289static const AVFilterPad avfilter_vf_vidstabtransform_inputs[] = { 290 { 291 .name = "default", 292 .type = AVMEDIA_TYPE_VIDEO, 293 .filter_frame = filter_frame, 294 .config_props = config_input, 295 }, 296 { NULL } 297}; 298 299static const AVFilterPad avfilter_vf_vidstabtransform_outputs[] = { 300 { 301 .name = "default", 302 .type = AVMEDIA_TYPE_VIDEO, 303 }, 304 { NULL } 305}; 306 307AVFilter ff_vf_vidstabtransform = { 308 .name = "vidstabtransform", 309 .description = NULL_IF_CONFIG_SMALL("Transform the frames, " 310 "pass 2 of 2 for stabilization " 311 "(see vidstabdetect for pass 1)."), 312 .priv_size = sizeof(TransformContext), 313 .init = init, 314 .uninit = uninit, 315 .query_formats = query_formats, 316 .inputs = avfilter_vf_vidstabtransform_inputs, 317 .outputs = avfilter_vf_vidstabtransform_outputs, 318 .priv_class = &vidstabtransform_class, 319}; 320