1/* 2 * Nellymoser encoder 3 * This code is developed as part of Google Summer of Code 2008 Program. 4 * 5 * Copyright (c) 2008 Bartlomiej Wolowiec 6 * 7 * This file is part of Libav. 8 * 9 * Libav is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * Libav is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with Libav; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 22 */ 23 24/** 25 * @file 26 * Nellymoser encoder 27 * by Bartlomiej Wolowiec 28 * 29 * Generic codec information: libavcodec/nellymoserdec.c 30 * 31 * Some information also from: http://samples.libav.org/A-codecs/Nelly_Moser/ASAO/ASAO.zip 32 * (Copyright Joseph Artsimovich and UAB "DKD") 33 * 34 * for more information about nellymoser format, visit: 35 * http://wiki.multimedia.cx/index.php?title=Nellymoser 36 */ 37 38#include "libavutil/mathematics.h" 39#include "nellymoser.h" 40#include "avcodec.h" 41#include "dsputil.h" 42#include "fft.h" 43#include "sinewin.h" 44 45#define BITSTREAM_WRITER_LE 46#include "put_bits.h" 47 48#define POW_TABLE_SIZE (1<<11) 49#define POW_TABLE_OFFSET 3 50#define OPT_SIZE ((1<<15) + 3000) 51 52typedef struct NellyMoserEncodeContext { 53 AVCodecContext *avctx; 54 int last_frame; 55 int bufsel; 56 int have_saved; 57 DSPContext dsp; 58 FFTContext mdct_ctx; 59 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES]; 60 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES]; 61 DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer 62 float (*opt )[NELLY_BANDS]; 63 uint8_t (*path)[NELLY_BANDS]; 64} NellyMoserEncodeContext; 65 66static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0); 67 68static const uint8_t sf_lut[96] = { 69 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 70 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14, 71 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26, 72 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 73 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53, 74 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, 75}; 76 77static const uint8_t sf_delta_lut[78] = { 78 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 79 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, 80 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23, 81 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 82 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 83}; 84 85static const uint8_t quant_lut[230] = { 86 0, 87 88 0, 1, 2, 89 90 0, 1, 2, 3, 4, 5, 6, 91 92 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 93 12, 13, 13, 13, 14, 94 95 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 96 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 97 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29, 98 30, 99 100 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 101 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 102 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, 103 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20, 104 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 105 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45, 106 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, 107 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57, 108 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 109 61, 61, 61, 61, 62, 110}; 111 112static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 }; 113static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 }; 114static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 }; 115 116static void apply_mdct(NellyMoserEncodeContext *s) 117{ 118 s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN); 119 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, 120 NELLY_BUF_LEN); 121 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff); 122 123 s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, 124 ff_sine_128, NELLY_BUF_LEN); 125 s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128, 126 NELLY_BUF_LEN); 127 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN); 128} 129 130static av_cold int encode_init(AVCodecContext *avctx) 131{ 132 NellyMoserEncodeContext *s = avctx->priv_data; 133 int i; 134 135 if (avctx->channels != 1) { 136 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n"); 137 return -1; 138 } 139 140 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 && 141 avctx->sample_rate != 11025 && 142 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 && 143 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) { 144 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n"); 145 return -1; 146 } 147 148 avctx->frame_size = NELLY_SAMPLES; 149 s->avctx = avctx; 150 ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0); 151 dsputil_init(&s->dsp, avctx); 152 153 /* Generate overlap window */ 154 ff_sine_window_init(ff_sine_128, 128); 155 for (i = 0; i < POW_TABLE_SIZE; i++) 156 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET); 157 158 if (s->avctx->trellis) { 159 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float )); 160 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t)); 161 } 162 163 return 0; 164} 165 166static av_cold int encode_end(AVCodecContext *avctx) 167{ 168 NellyMoserEncodeContext *s = avctx->priv_data; 169 170 ff_mdct_end(&s->mdct_ctx); 171 172 if (s->avctx->trellis) { 173 av_free(s->opt); 174 av_free(s->path); 175 } 176 177 return 0; 178} 179 180#define find_best(val, table, LUT, LUT_add, LUT_size) \ 181 best_idx = \ 182 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \ 183 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \ 184 best_idx++; 185 186static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table) 187{ 188 int band, best_idx, power_idx = 0; 189 float power_candidate; 190 191 //base exponent 192 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96); 193 idx_table[0] = best_idx; 194 power_idx = ff_nelly_init_table[best_idx]; 195 196 for (band = 1; band < NELLY_BANDS; band++) { 197 power_candidate = cand[band] - power_idx; 198 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78); 199 idx_table[band] = best_idx; 200 power_idx += ff_nelly_delta_table[best_idx]; 201 } 202} 203 204static inline float distance(float x, float y, int band) 205{ 206 //return pow(fabs(x-y), 2.0); 207 float tmp = x - y; 208 return tmp * tmp; 209} 210 211static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table) 212{ 213 int i, j, band, best_idx; 214 float power_candidate, best_val; 215 216 float (*opt )[NELLY_BANDS] = s->opt ; 217 uint8_t(*path)[NELLY_BANDS] = s->path; 218 219 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) { 220 opt[0][i] = INFINITY; 221 } 222 223 for (i = 0; i < 64; i++) { 224 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0); 225 path[0][ff_nelly_init_table[i]] = i; 226 } 227 228 for (band = 1; band < NELLY_BANDS; band++) { 229 int q, c = 0; 230 float tmp; 231 int idx_min, idx_max, idx; 232 power_candidate = cand[band]; 233 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) { 234 idx_min = FFMAX(0, cand[band] - q); 235 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q); 236 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) { 237 if ( isinf(opt[band - 1][i]) ) 238 continue; 239 for (j = 0; j < 32; j++) { 240 idx = i + ff_nelly_delta_table[j]; 241 if (idx > idx_max) 242 break; 243 if (idx >= idx_min) { 244 tmp = opt[band - 1][i] + distance(idx, power_candidate, band); 245 if (opt[band][idx] > tmp) { 246 opt[band][idx] = tmp; 247 path[band][idx] = j; 248 c = 1; 249 } 250 } 251 } 252 } 253 } 254 assert(c); //FIXME 255 } 256 257 best_val = INFINITY; 258 best_idx = -1; 259 band = NELLY_BANDS - 1; 260 for (i = 0; i < OPT_SIZE; i++) { 261 if (best_val > opt[band][i]) { 262 best_val = opt[band][i]; 263 best_idx = i; 264 } 265 } 266 for (band = NELLY_BANDS - 1; band >= 0; band--) { 267 idx_table[band] = path[band][best_idx]; 268 if (band) { 269 best_idx -= ff_nelly_delta_table[path[band][best_idx]]; 270 } 271 } 272} 273 274/** 275 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values 276 * @param s encoder context 277 * @param output output buffer 278 * @param output_size size of output buffer 279 */ 280static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size) 281{ 282 PutBitContext pb; 283 int i, j, band, block, best_idx, power_idx = 0; 284 float power_val, coeff, coeff_sum; 285 float pows[NELLY_FILL_LEN]; 286 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS]; 287 float cand[NELLY_BANDS]; 288 289 apply_mdct(s); 290 291 init_put_bits(&pb, output, output_size * 8); 292 293 i = 0; 294 for (band = 0; band < NELLY_BANDS; band++) { 295 coeff_sum = 0; 296 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 297 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ] 298 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN]; 299 } 300 cand[band] = 301 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2; 302 } 303 304 if (s->avctx->trellis) { 305 get_exponent_dynamic(s, cand, idx_table); 306 } else { 307 get_exponent_greedy(s, cand, idx_table); 308 } 309 310 i = 0; 311 for (band = 0; band < NELLY_BANDS; band++) { 312 if (band) { 313 power_idx += ff_nelly_delta_table[idx_table[band]]; 314 put_bits(&pb, 5, idx_table[band]); 315 } else { 316 power_idx = ff_nelly_init_table[idx_table[0]]; 317 put_bits(&pb, 6, idx_table[0]); 318 } 319 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET)); 320 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 321 s->mdct_out[i] *= power_val; 322 s->mdct_out[i + NELLY_BUF_LEN] *= power_val; 323 pows[i] = power_idx; 324 } 325 } 326 327 ff_nelly_get_sample_bits(pows, bits); 328 329 for (block = 0; block < 2; block++) { 330 for (i = 0; i < NELLY_FILL_LEN; i++) { 331 if (bits[i] > 0) { 332 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1; 333 coeff = s->mdct_out[block * NELLY_BUF_LEN + i]; 334 best_idx = 335 quant_lut[av_clip ( 336 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]], 337 quant_lut_offset[bits[i]], 338 quant_lut_offset[bits[i]+1] - 1 339 )]; 340 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1])) 341 best_idx++; 342 343 put_bits(&pb, bits[i], best_idx); 344 } 345 } 346 if (!block) 347 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0); 348 } 349 350 flush_put_bits(&pb); 351} 352 353static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data) 354{ 355 NellyMoserEncodeContext *s = avctx->priv_data; 356 const float *samples = data; 357 int i; 358 359 if (s->last_frame) 360 return 0; 361 362 if (data) { 363 memcpy(s->buf[s->bufsel], samples, avctx->frame_size * sizeof(*samples)); 364 for (i = avctx->frame_size; i < NELLY_SAMPLES; i++) { 365 s->buf[s->bufsel][i] = 0; 366 } 367 s->bufsel = 1 - s->bufsel; 368 if (!s->have_saved) { 369 s->have_saved = 1; 370 return 0; 371 } 372 } else { 373 memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN); 374 s->bufsel = 1 - s->bufsel; 375 s->last_frame = 1; 376 } 377 378 if (s->have_saved) { 379 encode_block(s, frame, buf_size); 380 return NELLY_BLOCK_LEN; 381 } 382 return 0; 383} 384 385AVCodec ff_nellymoser_encoder = { 386 .name = "nellymoser", 387 .type = AVMEDIA_TYPE_AUDIO, 388 .id = CODEC_ID_NELLYMOSER, 389 .priv_data_size = sizeof(NellyMoserEncodeContext), 390 .init = encode_init, 391 .encode = encode_frame, 392 .close = encode_end, 393 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY, 394 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"), 395 .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_NONE}, 396}; 397