1/* 2 * Nellymoser encoder 3 * This code is developed as part of Google Summer of Code 2008 Program. 4 * 5 * Copyright (c) 2008 Bartlomiej Wolowiec 6 * 7 * This file is part of FFmpeg. 8 * 9 * FFmpeg is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * FFmpeg is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with FFmpeg; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24/** 25 * @file 26 * Nellymoser encoder 27 * by Bartlomiej Wolowiec 28 * 29 * Generic codec information: libavcodec/nellymoserdec.c 30 * 31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip 32 * (Copyright Joseph Artsimovich and UAB "DKD") 33 * 34 * for more information about nellymoser format, visit: 35 * http://wiki.multimedia.cx/index.php?title=Nellymoser 36 */ 37 38#include "nellymoser.h" 39#include "avcodec.h" 40#include "dsputil.h" 41#include "fft.h" 42 43#define BITSTREAM_WRITER_LE 44#include "put_bits.h" 45 46#define POW_TABLE_SIZE (1<<11) 47#define POW_TABLE_OFFSET 3 48#define OPT_SIZE ((1<<15) + 3000) 49 50typedef struct NellyMoserEncodeContext { 51 AVCodecContext *avctx; 52 int last_frame; 53 int bufsel; 54 int have_saved; 55 DSPContext dsp; 56 FFTContext mdct_ctx; 57 DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES]; 58 DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES]; 59 DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer 60 float (*opt )[NELLY_BANDS]; 61 uint8_t (*path)[NELLY_BANDS]; 62} NellyMoserEncodeContext; 63 64static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0); 65 66static const uint8_t sf_lut[96] = { 67 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 68 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14, 69 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26, 70 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 71 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53, 72 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, 73}; 74 75static const uint8_t sf_delta_lut[78] = { 76 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 77 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, 78 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23, 79 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 80 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 81}; 82 83static const uint8_t quant_lut[230] = { 84 0, 85 86 0, 1, 2, 87 88 0, 1, 2, 3, 4, 5, 6, 89 90 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 91 12, 13, 13, 13, 14, 92 93 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 94 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 95 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29, 96 30, 97 98 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 99 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 100 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, 101 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20, 102 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 103 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45, 104 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, 105 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57, 106 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 107 61, 61, 61, 61, 62, 108}; 109 110static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 }; 111static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 }; 112static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 }; 113 114static void apply_mdct(NellyMoserEncodeContext *s) 115{ 116 memcpy(s->in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float)); 117 s->dsp.vector_fmul(s->in_buff, ff_sine_128, NELLY_BUF_LEN); 118 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, 119 NELLY_BUF_LEN); 120 ff_mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff); 121 122 s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN); 123 s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128, 124 NELLY_BUF_LEN); 125 ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN); 126} 127 128static av_cold int encode_init(AVCodecContext *avctx) 129{ 130 NellyMoserEncodeContext *s = avctx->priv_data; 131 int i; 132 133 if (avctx->channels != 1) { 134 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n"); 135 return -1; 136 } 137 138 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 && 139 avctx->sample_rate != 11025 && 140 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 && 141 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) { 142 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n"); 143 return -1; 144 } 145 146 avctx->frame_size = NELLY_SAMPLES; 147 s->avctx = avctx; 148 ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0); 149 dsputil_init(&s->dsp, avctx); 150 151 /* Generate overlap window */ 152 ff_sine_window_init(ff_sine_128, 128); 153 for (i = 0; i < POW_TABLE_SIZE; i++) 154 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET); 155 156 if (s->avctx->trellis) { 157 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float )); 158 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t)); 159 } 160 161 return 0; 162} 163 164static av_cold int encode_end(AVCodecContext *avctx) 165{ 166 NellyMoserEncodeContext *s = avctx->priv_data; 167 168 ff_mdct_end(&s->mdct_ctx); 169 170 if (s->avctx->trellis) { 171 av_free(s->opt); 172 av_free(s->path); 173 } 174 175 return 0; 176} 177 178#define find_best(val, table, LUT, LUT_add, LUT_size) \ 179 best_idx = \ 180 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \ 181 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \ 182 best_idx++; 183 184static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table) 185{ 186 int band, best_idx, power_idx = 0; 187 float power_candidate; 188 189 //base exponent 190 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96); 191 idx_table[0] = best_idx; 192 power_idx = ff_nelly_init_table[best_idx]; 193 194 for (band = 1; band < NELLY_BANDS; band++) { 195 power_candidate = cand[band] - power_idx; 196 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78); 197 idx_table[band] = best_idx; 198 power_idx += ff_nelly_delta_table[best_idx]; 199 } 200} 201 202static inline float distance(float x, float y, int band) 203{ 204 //return pow(fabs(x-y), 2.0); 205 float tmp = x - y; 206 return tmp * tmp; 207} 208 209static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table) 210{ 211 int i, j, band, best_idx; 212 float power_candidate, best_val; 213 214 float (*opt )[NELLY_BANDS] = s->opt ; 215 uint8_t(*path)[NELLY_BANDS] = s->path; 216 217 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) { 218 opt[0][i] = INFINITY; 219 } 220 221 for (i = 0; i < 64; i++) { 222 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0); 223 path[0][ff_nelly_init_table[i]] = i; 224 } 225 226 for (band = 1; band < NELLY_BANDS; band++) { 227 int q, c = 0; 228 float tmp; 229 int idx_min, idx_max, idx; 230 power_candidate = cand[band]; 231 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) { 232 idx_min = FFMAX(0, cand[band] - q); 233 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q); 234 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) { 235 if ( isinf(opt[band - 1][i]) ) 236 continue; 237 for (j = 0; j < 32; j++) { 238 idx = i + ff_nelly_delta_table[j]; 239 if (idx > idx_max) 240 break; 241 if (idx >= idx_min) { 242 tmp = opt[band - 1][i] + distance(idx, power_candidate, band); 243 if (opt[band][idx] > tmp) { 244 opt[band][idx] = tmp; 245 path[band][idx] = j; 246 c = 1; 247 } 248 } 249 } 250 } 251 } 252 assert(c); //FIXME 253 } 254 255 best_val = INFINITY; 256 best_idx = -1; 257 band = NELLY_BANDS - 1; 258 for (i = 0; i < OPT_SIZE; i++) { 259 if (best_val > opt[band][i]) { 260 best_val = opt[band][i]; 261 best_idx = i; 262 } 263 } 264 for (band = NELLY_BANDS - 1; band >= 0; band--) { 265 idx_table[band] = path[band][best_idx]; 266 if (band) { 267 best_idx -= ff_nelly_delta_table[path[band][best_idx]]; 268 } 269 } 270} 271 272/** 273 * Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values 274 * @param s encoder context 275 * @param output output buffer 276 * @param output_size size of output buffer 277 */ 278static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size) 279{ 280 PutBitContext pb; 281 int i, j, band, block, best_idx, power_idx = 0; 282 float power_val, coeff, coeff_sum; 283 float pows[NELLY_FILL_LEN]; 284 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS]; 285 float cand[NELLY_BANDS]; 286 287 apply_mdct(s); 288 289 init_put_bits(&pb, output, output_size * 8); 290 291 i = 0; 292 for (band = 0; band < NELLY_BANDS; band++) { 293 coeff_sum = 0; 294 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 295 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ] 296 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN]; 297 } 298 cand[band] = 299 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2; 300 } 301 302 if (s->avctx->trellis) { 303 get_exponent_dynamic(s, cand, idx_table); 304 } else { 305 get_exponent_greedy(s, cand, idx_table); 306 } 307 308 i = 0; 309 for (band = 0; band < NELLY_BANDS; band++) { 310 if (band) { 311 power_idx += ff_nelly_delta_table[idx_table[band]]; 312 put_bits(&pb, 5, idx_table[band]); 313 } else { 314 power_idx = ff_nelly_init_table[idx_table[0]]; 315 put_bits(&pb, 6, idx_table[0]); 316 } 317 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET)); 318 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 319 s->mdct_out[i] *= power_val; 320 s->mdct_out[i + NELLY_BUF_LEN] *= power_val; 321 pows[i] = power_idx; 322 } 323 } 324 325 ff_nelly_get_sample_bits(pows, bits); 326 327 for (block = 0; block < 2; block++) { 328 for (i = 0; i < NELLY_FILL_LEN; i++) { 329 if (bits[i] > 0) { 330 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1; 331 coeff = s->mdct_out[block * NELLY_BUF_LEN + i]; 332 best_idx = 333 quant_lut[av_clip ( 334 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]], 335 quant_lut_offset[bits[i]], 336 quant_lut_offset[bits[i]+1] - 1 337 )]; 338 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1])) 339 best_idx++; 340 341 put_bits(&pb, bits[i], best_idx); 342 } 343 } 344 if (!block) 345 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0); 346 } 347 348 flush_put_bits(&pb); 349} 350 351static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data) 352{ 353 NellyMoserEncodeContext *s = avctx->priv_data; 354 int16_t *samples = data; 355 int i; 356 357 if (s->last_frame) 358 return 0; 359 360 if (data) { 361 for (i = 0; i < avctx->frame_size; i++) { 362 s->buf[s->bufsel][i] = samples[i]; 363 } 364 for (; i < NELLY_SAMPLES; i++) { 365 s->buf[s->bufsel][i] = 0; 366 } 367 s->bufsel = 1 - s->bufsel; 368 if (!s->have_saved) { 369 s->have_saved = 1; 370 return 0; 371 } 372 } else { 373 memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN); 374 s->bufsel = 1 - s->bufsel; 375 s->last_frame = 1; 376 } 377 378 if (s->have_saved) { 379 encode_block(s, frame, buf_size); 380 return NELLY_BLOCK_LEN; 381 } 382 return 0; 383} 384 385AVCodec nellymoser_encoder = { 386 .name = "nellymoser", 387 .type = AVMEDIA_TYPE_AUDIO, 388 .id = CODEC_ID_NELLYMOSER, 389 .priv_data_size = sizeof(NellyMoserEncodeContext), 390 .init = encode_init, 391 .encode = encode_frame, 392 .close = encode_end, 393 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY, 394 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"), 395}; 396