1/* 2 * Nellymoser encoder 3 * This code is developed as part of Google Summer of Code 2008 Program. 4 * 5 * Copyright (c) 2008 Bartlomiej Wolowiec 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24/** 25 * @file 26 * Nellymoser encoder 27 * by Bartlomiej Wolowiec 28 * 29 * Generic codec information: libavcodec/nellymoserdec.c 30 * 31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip 32 * (Copyright Joseph Artsimovich and UAB "DKD") 33 * 34 * for more information about nellymoser format, visit: 35 * http://wiki.multimedia.cx/index.php?title=Nellymoser 36 */ 37 38#include "libavutil/common.h" 39#include "libavutil/float_dsp.h" 40#include "libavutil/mathematics.h" 41 42#include "audio_frame_queue.h" 43#include "avcodec.h" 44#include "fft.h" 45#include "internal.h" 46#include "nellymoser.h" 47#include "sinewin.h" 48 49#define BITSTREAM_WRITER_LE 50#include "put_bits.h" 51 52#define POW_TABLE_SIZE (1<<11) 53#define POW_TABLE_OFFSET 3 54#define OPT_SIZE ((1<<15) + 3000) 55 56typedef struct NellyMoserEncodeContext { 57 AVCodecContext *avctx; 58 int last_frame; 59 AVFloatDSPContext fdsp; 60 FFTContext mdct_ctx; 61 AudioFrameQueue afq; 62 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES]; 63 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES]; 64 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer 65 float (*opt )[OPT_SIZE]; 66 uint8_t (*path)[OPT_SIZE]; 67} NellyMoserEncodeContext; 68 69static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0); 70 71static const uint8_t sf_lut[96] = { 72 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 73 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14, 74 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26, 75 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 76 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53, 77 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, 78}; 79 80static const uint8_t sf_delta_lut[78] = { 81 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 82 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, 83 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23, 84 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 85 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 86}; 87 88static const uint8_t quant_lut[230] = { 89 0, 90 91 0, 1, 2, 92 93 0, 1, 2, 3, 4, 5, 6, 94 95 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 96 12, 13, 13, 13, 14, 97 98 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 99 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 100 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29, 101 30, 102 103 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 104 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 105 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, 106 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20, 107 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 108 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45, 109 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, 110 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57, 111 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 112 61, 61, 61, 61, 62, 113}; 114 115static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 }; 116static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 }; 117static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 }; 118 119static void apply_mdct(NellyMoserEncodeContext *s) 120{ 121 float *in0 = s->buf; 122 float *in1 = s->buf + NELLY_BUF_LEN; 123 float *in2 = s->buf + 2 * NELLY_BUF_LEN; 124 125 s->fdsp.vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN); 126 s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN); 127 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff); 128 129 s->fdsp.vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN); 130 s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN); 131 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff); 132} 133 134static av_cold int encode_end(AVCodecContext *avctx) 135{ 136 NellyMoserEncodeContext *s = avctx->priv_data; 137 138 ff_mdct_end(&s->mdct_ctx); 139 140 if (s->avctx->trellis) { 141 av_free(s->opt); 142 av_free(s->path); 143 } 144 ff_af_queue_close(&s->afq); 145 146 return 0; 147} 148 149static av_cold int encode_init(AVCodecContext *avctx) 150{ 151 NellyMoserEncodeContext *s = avctx->priv_data; 152 int i, ret; 153 154 if (avctx->channels != 1) { 155 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n"); 156 return AVERROR(EINVAL); 157 } 158 159 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 && 160 avctx->sample_rate != 11025 && 161 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 && 162 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) { 163 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n"); 164 return AVERROR(EINVAL); 165 } 166 167 avctx->frame_size = NELLY_SAMPLES; 168 avctx->delay = NELLY_BUF_LEN; 169 ff_af_queue_init(avctx, &s->afq); 170 s->avctx = avctx; 171 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0) 172 goto error; 173 avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); 174 175 /* Generate overlap window */ 176 ff_init_ff_sine_windows(7); 177 for (i = 0; i < POW_TABLE_SIZE; i++) 178 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET); 179 180 if (s->avctx->trellis) { 181 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float )); 182 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t)); 183 if (!s->opt || !s->path) { 184 ret = AVERROR(ENOMEM); 185 goto error; 186 } 187 } 188 189 return 0; 190error: 191 encode_end(avctx); 192 return ret; 193} 194 195#define find_best(val, table, LUT, LUT_add, LUT_size) \ 196 best_idx = \ 197 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \ 198 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \ 199 best_idx++; 200 201static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table) 202{ 203 int band, best_idx, power_idx = 0; 204 float power_candidate; 205 206 //base exponent 207 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96); 208 idx_table[0] = best_idx; 209 power_idx = ff_nelly_init_table[best_idx]; 210 211 for (band = 1; band < NELLY_BANDS; band++) { 212 power_candidate = cand[band] - power_idx; 213 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78); 214 idx_table[band] = best_idx; 215 power_idx += ff_nelly_delta_table[best_idx]; 216 } 217} 218 219static inline float distance(float x, float y, int band) 220{ 221 //return pow(fabs(x-y), 2.0); 222 float tmp = x - y; 223 return tmp * tmp; 224} 225 226static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table) 227{ 228 int i, j, band, best_idx; 229 float power_candidate, best_val; 230 231 float (*opt )[OPT_SIZE] = s->opt ; 232 uint8_t(*path)[OPT_SIZE] = s->path; 233 234 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) { 235 opt[0][i] = INFINITY; 236 } 237 238 for (i = 0; i < 64; i++) { 239 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0); 240 path[0][ff_nelly_init_table[i]] = i; 241 } 242 243 for (band = 1; band < NELLY_BANDS; band++) { 244 int q, c = 0; 245 float tmp; 246 int idx_min, idx_max, idx; 247 power_candidate = cand[band]; 248 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) { 249 idx_min = FFMAX(0, cand[band] - q); 250 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q); 251 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) { 252 if ( isinf(opt[band - 1][i]) ) 253 continue; 254 for (j = 0; j < 32; j++) { 255 idx = i + ff_nelly_delta_table[j]; 256 if (idx > idx_max) 257 break; 258 if (idx >= idx_min) { 259 tmp = opt[band - 1][i] + distance(idx, power_candidate, band); 260 if (opt[band][idx] > tmp) { 261 opt[band][idx] = tmp; 262 path[band][idx] = j; 263 c = 1; 264 } 265 } 266 } 267 } 268 } 269 assert(c); //FIXME 270 } 271 272 best_val = INFINITY; 273 best_idx = -1; 274 band = NELLY_BANDS - 1; 275 for (i = 0; i < OPT_SIZE; i++) { 276 if (best_val > opt[band][i]) { 277 best_val = opt[band][i]; 278 best_idx = i; 279 } 280 } 281 for (band = NELLY_BANDS - 1; band >= 0; band--) { 282 idx_table[band] = path[band][best_idx]; 283 if (band) { 284 best_idx -= ff_nelly_delta_table[path[band][best_idx]]; 285 } 286 } 287} 288 289/** 290 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values 291 * @param s encoder context 292 * @param output output buffer 293 * @param output_size size of output buffer 294 */ 295static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size) 296{ 297 PutBitContext pb; 298 int i, j, band, block, best_idx, power_idx = 0; 299 float power_val, coeff, coeff_sum; 300 float pows[NELLY_FILL_LEN]; 301 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS]; 302 float cand[NELLY_BANDS]; 303 304 apply_mdct(s); 305 306 init_put_bits(&pb, output, output_size * 8); 307 308 i = 0; 309 for (band = 0; band < NELLY_BANDS; band++) { 310 coeff_sum = 0; 311 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 312 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ] 313 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN]; 314 } 315 cand[band] = 316 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2; 317 } 318 319 if (s->avctx->trellis) { 320 get_exponent_dynamic(s, cand, idx_table); 321 } else { 322 get_exponent_greedy(s, cand, idx_table); 323 } 324 325 i = 0; 326 for (band = 0; band < NELLY_BANDS; band++) { 327 if (band) { 328 power_idx += ff_nelly_delta_table[idx_table[band]]; 329 put_bits(&pb, 5, idx_table[band]); 330 } else { 331 power_idx = ff_nelly_init_table[idx_table[0]]; 332 put_bits(&pb, 6, idx_table[0]); 333 } 334 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET)); 335 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 336 s->mdct_out[i] *= power_val; 337 s->mdct_out[i + NELLY_BUF_LEN] *= power_val; 338 pows[i] = power_idx; 339 } 340 } 341 342 ff_nelly_get_sample_bits(pows, bits); 343 344 for (block = 0; block < 2; block++) { 345 for (i = 0; i < NELLY_FILL_LEN; i++) { 346 if (bits[i] > 0) { 347 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1; 348 coeff = s->mdct_out[block * NELLY_BUF_LEN + i]; 349 best_idx = 350 quant_lut[av_clip ( 351 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]], 352 quant_lut_offset[bits[i]], 353 quant_lut_offset[bits[i]+1] - 1 354 )]; 355 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1])) 356 best_idx++; 357 358 put_bits(&pb, bits[i], best_idx); 359 } 360 } 361 if (!block) 362 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0); 363 } 364 365 flush_put_bits(&pb); 366 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb)); 367} 368 369static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt, 370 const AVFrame *frame, int *got_packet_ptr) 371{ 372 NellyMoserEncodeContext *s = avctx->priv_data; 373 int ret; 374 375 if (s->last_frame) 376 return 0; 377 378 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf)); 379 if (frame) { 380 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0], 381 frame->nb_samples * sizeof(*s->buf)); 382 if (frame->nb_samples < NELLY_SAMPLES) { 383 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0, 384 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf)); 385 if (frame->nb_samples >= NELLY_BUF_LEN) 386 s->last_frame = 1; 387 } 388 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0) 389 return ret; 390 } else { 391 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf)); 392 s->last_frame = 1; 393 } 394 395 if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0) 396 return ret; 397 encode_block(s, avpkt->data, avpkt->size); 398 399 /* Get the next frame pts/duration */ 400 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts, 401 &avpkt->duration); 402 403 *got_packet_ptr = 1; 404 return 0; 405} 406 407AVCodec ff_nellymoser_encoder = { 408 .name = "nellymoser", 409 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"), 410 .type = AVMEDIA_TYPE_AUDIO, 411 .id = AV_CODEC_ID_NELLYMOSER, 412 .priv_data_size = sizeof(NellyMoserEncodeContext), 413 .init = encode_init, 414 .encode2 = encode_frame, 415 .close = encode_end, 416 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY, 417 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT, 418 AV_SAMPLE_FMT_NONE }, 419}; 420