1/* 2 * ALAC (Apple Lossless Audio Codec) decoder 3 * Copyright (c) 2005 David Hammerton 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file libavcodec/alac.c 24 * ALAC (Apple Lossless Audio Codec) decoder 25 * @author 2005 David Hammerton 26 * 27 * For more information on the ALAC format, visit: 28 * http://crazney.net/programs/itunes/alac.html 29 * 30 * Note: This decoder expects a 36- (0x24-)byte QuickTime atom to be 31 * passed through the extradata[_size] fields. This atom is tacked onto 32 * the end of an 'alac' stsd atom and has the following format: 33 * bytes 0-3 atom size (0x24), big-endian 34 * bytes 4-7 atom type ('alac', not the 'alac' tag from start of stsd) 35 * bytes 8-35 data bytes needed by decoder 36 * 37 * Extradata: 38 * 32bit size 39 * 32bit tag (=alac) 40 * 32bit zero? 41 * 32bit max sample per frame 42 * 8bit ?? (zero?) 43 * 8bit sample size 44 * 8bit history mult 45 * 8bit initial history 46 * 8bit kmodifier 47 * 8bit channels? 48 * 16bit ?? 49 * 32bit max coded frame size 50 * 32bit bitrate? 51 * 32bit samplerate 52 */ 53 54 55#include "avcodec.h" 56#include "bitstream.h" 57#include "bytestream.h" 58#include "unary.h" 59 60#define ALAC_EXTRADATA_SIZE 36 61#define MAX_CHANNELS 2 62 63typedef struct { 64 65 AVCodecContext *avctx; 66 GetBitContext gb; 67 /* init to 0; first frame decode should initialize from extradata and 68 * set this to 1 */ 69 int context_initialized; 70 71 int numchannels; 72 int bytespersample; 73 74 /* buffers */ 75 int32_t *predicterror_buffer[MAX_CHANNELS]; 76 77 int32_t *outputsamples_buffer[MAX_CHANNELS]; 78 79 /* stuff from setinfo */ 80 uint32_t setinfo_max_samples_per_frame; /* 0x1000 = 4096 */ /* max samples per frame? */ 81 uint8_t setinfo_sample_size; /* 0x10 */ 82 uint8_t setinfo_rice_historymult; /* 0x28 */ 83 uint8_t setinfo_rice_initialhistory; /* 0x0a */ 84 uint8_t setinfo_rice_kmodifier; /* 0x0e */ 85 /* end setinfo stuff */ 86 87} ALACContext; 88 89static void allocate_buffers(ALACContext *alac) 90{ 91 int chan; 92 for (chan = 0; chan < MAX_CHANNELS; chan++) { 93 alac->predicterror_buffer[chan] = 94 av_malloc(alac->setinfo_max_samples_per_frame * 4); 95 96 alac->outputsamples_buffer[chan] = 97 av_malloc(alac->setinfo_max_samples_per_frame * 4); 98 } 99} 100 101static int alac_set_info(ALACContext *alac) 102{ 103 const unsigned char *ptr = alac->avctx->extradata; 104 105 ptr += 4; /* size */ 106 ptr += 4; /* alac */ 107 ptr += 4; /* 0 ? */ 108 109 if(AV_RB32(ptr) >= UINT_MAX/4){ 110 av_log(alac->avctx, AV_LOG_ERROR, "setinfo_max_samples_per_frame too large\n"); 111 return -1; 112 } 113 114 /* buffer size / 2 ? */ 115 alac->setinfo_max_samples_per_frame = bytestream_get_be32(&ptr); 116 ptr++; /* ??? */ 117 alac->setinfo_sample_size = *ptr++; 118 if (alac->setinfo_sample_size > 32) { 119 av_log(alac->avctx, AV_LOG_ERROR, "setinfo_sample_size too large\n"); 120 return -1; 121 } 122 alac->setinfo_rice_historymult = *ptr++; 123 alac->setinfo_rice_initialhistory = *ptr++; 124 alac->setinfo_rice_kmodifier = *ptr++; 125 ptr++; /* channels? */ 126 bytestream_get_be16(&ptr); /* ??? */ 127 bytestream_get_be32(&ptr); /* max coded frame size */ 128 bytestream_get_be32(&ptr); /* bitrate ? */ 129 bytestream_get_be32(&ptr); /* samplerate */ 130 131 allocate_buffers(alac); 132 133 return 0; 134} 135 136static inline int decode_scalar(GetBitContext *gb, int k, int limit, int readsamplesize){ 137 /* read x - number of 1s before 0 represent the rice */ 138 int x = get_unary_0_9(gb); 139 140 if (x > 8) { /* RICE THRESHOLD */ 141 /* use alternative encoding */ 142 x = get_bits(gb, readsamplesize); 143 } else { 144 if (k >= limit) 145 k = limit; 146 147 if (k != 1) { 148 int extrabits = show_bits(gb, k); 149 150 /* multiply x by 2^k - 1, as part of their strange algorithm */ 151 x = (x << k) - x; 152 153 if (extrabits > 1) { 154 x += extrabits - 1; 155 skip_bits(gb, k); 156 } else 157 skip_bits(gb, k - 1); 158 } 159 } 160 return x; 161} 162 163static void bastardized_rice_decompress(ALACContext *alac, 164 int32_t *output_buffer, 165 int output_size, 166 int readsamplesize, /* arg_10 */ 167 int rice_initialhistory, /* arg424->b */ 168 int rice_kmodifier, /* arg424->d */ 169 int rice_historymult, /* arg424->c */ 170 int rice_kmodifier_mask /* arg424->e */ 171 ) 172{ 173 int output_count; 174 unsigned int history = rice_initialhistory; 175 int sign_modifier = 0; 176 177 for (output_count = 0; output_count < output_size; output_count++) { 178 int32_t x; 179 int32_t x_modified; 180 int32_t final_val; 181 182 /* standard rice encoding */ 183 int k; /* size of extra bits */ 184 185 /* read k, that is bits as is */ 186 k = av_log2((history >> 9) + 3); 187 x= decode_scalar(&alac->gb, k, rice_kmodifier, readsamplesize); 188 189 x_modified = sign_modifier + x; 190 final_val = (x_modified + 1) / 2; 191 if (x_modified & 1) final_val *= -1; 192 193 output_buffer[output_count] = final_val; 194 195 sign_modifier = 0; 196 197 /* now update the history */ 198 history += x_modified * rice_historymult 199 - ((history * rice_historymult) >> 9); 200 201 if (x_modified > 0xffff) 202 history = 0xffff; 203 204 /* special case: there may be compressed blocks of 0 */ 205 if ((history < 128) && (output_count+1 < output_size)) { 206 int k; 207 unsigned int block_size; 208 209 sign_modifier = 1; 210 211 k = 7 - av_log2(history) + ((history + 16) >> 6 /* / 64 */); 212 213 block_size= decode_scalar(&alac->gb, k, rice_kmodifier, 16); 214 215 if (block_size > 0) { 216 if(block_size >= output_size - output_count){ 217 av_log(alac->avctx, AV_LOG_ERROR, "invalid zero block size of %d %d %d\n", block_size, output_size, output_count); 218 block_size= output_size - output_count - 1; 219 } 220 memset(&output_buffer[output_count+1], 0, block_size * 4); 221 output_count += block_size; 222 } 223 224 if (block_size > 0xffff) 225 sign_modifier = 0; 226 227 history = 0; 228 } 229 } 230} 231 232static inline int32_t extend_sign32(int32_t val, int bits) 233{ 234 return (val << (32 - bits)) >> (32 - bits); 235} 236 237static inline int sign_only(int v) 238{ 239 return v ? FFSIGN(v) : 0; 240} 241 242static void predictor_decompress_fir_adapt(int32_t *error_buffer, 243 int32_t *buffer_out, 244 int output_size, 245 int readsamplesize, 246 int16_t *predictor_coef_table, 247 int predictor_coef_num, 248 int predictor_quantitization) 249{ 250 int i; 251 252 /* first sample always copies */ 253 *buffer_out = *error_buffer; 254 255 if (!predictor_coef_num) { 256 if (output_size <= 1) 257 return; 258 259 memcpy(buffer_out+1, error_buffer+1, (output_size-1) * 4); 260 return; 261 } 262 263 if (predictor_coef_num == 0x1f) { /* 11111 - max value of predictor_coef_num */ 264 /* second-best case scenario for fir decompression, 265 * error describes a small difference from the previous sample only 266 */ 267 if (output_size <= 1) 268 return; 269 for (i = 0; i < output_size - 1; i++) { 270 int32_t prev_value; 271 int32_t error_value; 272 273 prev_value = buffer_out[i]; 274 error_value = error_buffer[i+1]; 275 buffer_out[i+1] = 276 extend_sign32((prev_value + error_value), readsamplesize); 277 } 278 return; 279 } 280 281 /* read warm-up samples */ 282 if (predictor_coef_num > 0) 283 for (i = 0; i < predictor_coef_num; i++) { 284 int32_t val; 285 286 val = buffer_out[i] + error_buffer[i+1]; 287 val = extend_sign32(val, readsamplesize); 288 buffer_out[i+1] = val; 289 } 290 291#if 0 292 /* 4 and 8 are very common cases (the only ones i've seen). these 293 * should be unrolled and optimized 294 */ 295 if (predictor_coef_num == 4) { 296 /* FIXME: optimized general case */ 297 return; 298 } 299 300 if (predictor_coef_table == 8) { 301 /* FIXME: optimized general case */ 302 return; 303 } 304#endif 305 306 /* general case */ 307 if (predictor_coef_num > 0) { 308 for (i = predictor_coef_num + 1; i < output_size; i++) { 309 int j; 310 int sum = 0; 311 int outval; 312 int error_val = error_buffer[i]; 313 314 for (j = 0; j < predictor_coef_num; j++) { 315 sum += (buffer_out[predictor_coef_num-j] - buffer_out[0]) * 316 predictor_coef_table[j]; 317 } 318 319 outval = (1 << (predictor_quantitization-1)) + sum; 320 outval = outval >> predictor_quantitization; 321 outval = outval + buffer_out[0] + error_val; 322 outval = extend_sign32(outval, readsamplesize); 323 324 buffer_out[predictor_coef_num+1] = outval; 325 326 if (error_val > 0) { 327 int predictor_num = predictor_coef_num - 1; 328 329 while (predictor_num >= 0 && error_val > 0) { 330 int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; 331 int sign = sign_only(val); 332 333 predictor_coef_table[predictor_num] -= sign; 334 335 val *= sign; /* absolute value */ 336 337 error_val -= ((val >> predictor_quantitization) * 338 (predictor_coef_num - predictor_num)); 339 340 predictor_num--; 341 } 342 } else if (error_val < 0) { 343 int predictor_num = predictor_coef_num - 1; 344 345 while (predictor_num >= 0 && error_val < 0) { 346 int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; 347 int sign = - sign_only(val); 348 349 predictor_coef_table[predictor_num] -= sign; 350 351 val *= sign; /* neg value */ 352 353 error_val -= ((val >> predictor_quantitization) * 354 (predictor_coef_num - predictor_num)); 355 356 predictor_num--; 357 } 358 } 359 360 buffer_out++; 361 } 362 } 363} 364 365static void reconstruct_stereo_16(int32_t *buffer[MAX_CHANNELS], 366 int16_t *buffer_out, 367 int numchannels, int numsamples, 368 uint8_t interlacing_shift, 369 uint8_t interlacing_leftweight) 370{ 371 int i; 372 if (numsamples <= 0) 373 return; 374 375 /* weighted interlacing */ 376 if (interlacing_leftweight) { 377 for (i = 0; i < numsamples; i++) { 378 int32_t a, b; 379 380 a = buffer[0][i]; 381 b = buffer[1][i]; 382 383 a -= (b * interlacing_leftweight) >> interlacing_shift; 384 b += a; 385 386 buffer_out[i*numchannels] = b; 387 buffer_out[i*numchannels + 1] = a; 388 } 389 390 return; 391 } 392 393 /* otherwise basic interlacing took place */ 394 for (i = 0; i < numsamples; i++) { 395 int16_t left, right; 396 397 left = buffer[0][i]; 398 right = buffer[1][i]; 399 400 buffer_out[i*numchannels] = left; 401 buffer_out[i*numchannels + 1] = right; 402 } 403} 404 405static int alac_decode_frame(AVCodecContext *avctx, 406 void *outbuffer, int *outputsize, 407 const uint8_t *inbuffer, int input_buffer_size) 408{ 409 ALACContext *alac = avctx->priv_data; 410 411 int channels; 412 unsigned int outputsamples; 413 int hassize; 414 unsigned int readsamplesize; 415 int wasted_bytes; 416 int isnotcompressed; 417 uint8_t interlacing_shift; 418 uint8_t interlacing_leftweight; 419 420 /* short-circuit null buffers */ 421 if (!inbuffer || !input_buffer_size) 422 return input_buffer_size; 423 424 /* initialize from the extradata */ 425 if (!alac->context_initialized) { 426 if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) { 427 av_log(avctx, AV_LOG_ERROR, "alac: expected %d extradata bytes\n", 428 ALAC_EXTRADATA_SIZE); 429 return input_buffer_size; 430 } 431 if (alac_set_info(alac)) { 432 av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n"); 433 return input_buffer_size; 434 } 435 alac->context_initialized = 1; 436 } 437 438 init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8); 439 440 channels = get_bits(&alac->gb, 3) + 1; 441 if (channels > MAX_CHANNELS) { 442 av_log(avctx, AV_LOG_ERROR, "channels > %d not supported\n", 443 MAX_CHANNELS); 444 return input_buffer_size; 445 } 446 447 /* 2^result = something to do with output waiting. 448 * perhaps matters if we read > 1 frame in a pass? 449 */ 450 skip_bits(&alac->gb, 4); 451 452 skip_bits(&alac->gb, 12); /* unknown, skip 12 bits */ 453 454 /* the output sample size is stored soon */ 455 hassize = get_bits1(&alac->gb); 456 457 wasted_bytes = get_bits(&alac->gb, 2); /* unknown ? */ 458 459 /* whether the frame is compressed */ 460 isnotcompressed = get_bits1(&alac->gb); 461 462 if (hassize) { 463 /* now read the number of samples as a 32bit integer */ 464 outputsamples = get_bits_long(&alac->gb, 32); 465 if(outputsamples > alac->setinfo_max_samples_per_frame){ 466 av_log(avctx, AV_LOG_ERROR, "outputsamples %d > %d\n", outputsamples, alac->setinfo_max_samples_per_frame); 467 return -1; 468 } 469 } else 470 outputsamples = alac->setinfo_max_samples_per_frame; 471 472 if(outputsamples > *outputsize / alac->bytespersample){ 473 av_log(avctx, AV_LOG_ERROR, "sample buffer too small\n"); 474 return -1; 475 } 476 477 *outputsize = outputsamples * alac->bytespersample; 478 readsamplesize = alac->setinfo_sample_size - (wasted_bytes * 8) + channels - 1; 479 if (readsamplesize > MIN_CACHE_BITS) { 480 av_log(avctx, AV_LOG_ERROR, "readsamplesize too big (%d)\n", readsamplesize); 481 return -1; 482 } 483 484 if (!isnotcompressed) { 485 /* so it is compressed */ 486 int16_t predictor_coef_table[channels][32]; 487 int predictor_coef_num[channels]; 488 int prediction_type[channels]; 489 int prediction_quantitization[channels]; 490 int ricemodifier[channels]; 491 int i, chan; 492 493 interlacing_shift = get_bits(&alac->gb, 8); 494 interlacing_leftweight = get_bits(&alac->gb, 8); 495 496 for (chan = 0; chan < channels; chan++) { 497 prediction_type[chan] = get_bits(&alac->gb, 4); 498 prediction_quantitization[chan] = get_bits(&alac->gb, 4); 499 500 ricemodifier[chan] = get_bits(&alac->gb, 3); 501 predictor_coef_num[chan] = get_bits(&alac->gb, 5); 502 503 /* read the predictor table */ 504 for (i = 0; i < predictor_coef_num[chan]; i++) 505 predictor_coef_table[chan][i] = (int16_t)get_bits(&alac->gb, 16); 506 } 507 508 if (wasted_bytes) 509 av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); 510 511 for (chan = 0; chan < channels; chan++) { 512 bastardized_rice_decompress(alac, 513 alac->predicterror_buffer[chan], 514 outputsamples, 515 readsamplesize, 516 alac->setinfo_rice_initialhistory, 517 alac->setinfo_rice_kmodifier, 518 ricemodifier[chan] * alac->setinfo_rice_historymult / 4, 519 (1 << alac->setinfo_rice_kmodifier) - 1); 520 521 if (prediction_type[chan] == 0) { 522 /* adaptive fir */ 523 predictor_decompress_fir_adapt(alac->predicterror_buffer[chan], 524 alac->outputsamples_buffer[chan], 525 outputsamples, 526 readsamplesize, 527 predictor_coef_table[chan], 528 predictor_coef_num[chan], 529 prediction_quantitization[chan]); 530 } else { 531 av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type[chan]); 532 /* I think the only other prediction type (or perhaps this is 533 * just a boolean?) runs adaptive fir twice.. like: 534 * predictor_decompress_fir_adapt(predictor_error, tempout, ...) 535 * predictor_decompress_fir_adapt(predictor_error, outputsamples ...) 536 * little strange.. 537 */ 538 } 539 } 540 } else { 541 /* not compressed, easy case */ 542 int i, chan; 543 for (i = 0; i < outputsamples; i++) 544 for (chan = 0; chan < channels; chan++) { 545 int32_t audiobits; 546 547 audiobits = get_bits_long(&alac->gb, alac->setinfo_sample_size); 548 audiobits = extend_sign32(audiobits, alac->setinfo_sample_size); 549 550 alac->outputsamples_buffer[chan][i] = audiobits; 551 } 552 /* wasted_bytes = 0; */ 553 interlacing_shift = 0; 554 interlacing_leftweight = 0; 555 } 556 if (get_bits(&alac->gb, 3) != 7) 557 av_log(avctx, AV_LOG_ERROR, "Error : Wrong End Of Frame\n"); 558 559 switch(alac->setinfo_sample_size) { 560 case 16: 561 if (channels == 2) { 562 reconstruct_stereo_16(alac->outputsamples_buffer, 563 (int16_t*)outbuffer, 564 alac->numchannels, 565 outputsamples, 566 interlacing_shift, 567 interlacing_leftweight); 568 } else { 569 int i; 570 for (i = 0; i < outputsamples; i++) { 571 int16_t sample = alac->outputsamples_buffer[0][i]; 572 ((int16_t*)outbuffer)[i * alac->numchannels] = sample; 573 } 574 } 575 break; 576 case 20: 577 case 24: 578 // It is not clear if there exist any encoder that creates 24 bit ALAC 579 // files. iTunes convert 24 bit raw files to 16 bit before encoding. 580 case 32: 581 av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); 582 break; 583 default: 584 break; 585 } 586 587 if (input_buffer_size * 8 - get_bits_count(&alac->gb) > 8) 588 av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n", input_buffer_size * 8 - get_bits_count(&alac->gb)); 589 590 return input_buffer_size; 591} 592 593static av_cold int alac_decode_init(AVCodecContext * avctx) 594{ 595 ALACContext *alac = avctx->priv_data; 596 alac->avctx = avctx; 597 alac->context_initialized = 0; 598 599 alac->numchannels = alac->avctx->channels; 600 alac->bytespersample = 2 * alac->numchannels; 601 avctx->sample_fmt = SAMPLE_FMT_S16; 602 603 return 0; 604} 605 606static av_cold int alac_decode_close(AVCodecContext *avctx) 607{ 608 ALACContext *alac = avctx->priv_data; 609 610 int chan; 611 for (chan = 0; chan < MAX_CHANNELS; chan++) { 612 av_free(alac->predicterror_buffer[chan]); 613 av_free(alac->outputsamples_buffer[chan]); 614 } 615 616 return 0; 617} 618 619AVCodec alac_decoder = { 620 "alac", 621 CODEC_TYPE_AUDIO, 622 CODEC_ID_ALAC, 623 sizeof(ALACContext), 624 alac_decode_init, 625 NULL, 626 alac_decode_close, 627 alac_decode_frame, 628 .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"), 629}; 630