1/* 2 * ALAC (Apple Lossless Audio Codec) decoder 3 * Copyright (c) 2005 David Hammerton 4 * 5 * This file is part of FFmpeg. 6 * 7 * FFmpeg is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Lesser General Public 9 * License as published by the Free Software Foundation; either 10 * version 2.1 of the License, or (at your option) any later version. 11 * 12 * FFmpeg is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Lesser General Public License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public 18 * License along with FFmpeg; if not, write to the Free Software 19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 */ 21 22/** 23 * @file 24 * ALAC (Apple Lossless Audio Codec) decoder 25 * @author 2005 David Hammerton 26 * 27 * For more information on the ALAC format, visit: 28 * http://crazney.net/programs/itunes/alac.html 29 * 30 * Note: This decoder expects a 36- (0x24-)byte QuickTime atom to be 31 * passed through the extradata[_size] fields. This atom is tacked onto 32 * the end of an 'alac' stsd atom and has the following format: 33 * bytes 0-3 atom size (0x24), big-endian 34 * bytes 4-7 atom type ('alac', not the 'alac' tag from start of stsd) 35 * bytes 8-35 data bytes needed by decoder 36 * 37 * Extradata: 38 * 32bit size 39 * 32bit tag (=alac) 40 * 32bit zero? 41 * 32bit max sample per frame 42 * 8bit ?? (zero?) 43 * 8bit sample size 44 * 8bit history mult 45 * 8bit initial history 46 * 8bit kmodifier 47 * 8bit channels? 48 * 16bit ?? 49 * 32bit max coded frame size 50 * 32bit bitrate? 51 * 32bit samplerate 52 */ 53 54 55#include "avcodec.h" 56#include "get_bits.h" 57#include "bytestream.h" 58#include "unary.h" 59#include "mathops.h" 60 61#define ALAC_EXTRADATA_SIZE 36 62#define MAX_CHANNELS 2 63 64typedef struct { 65 66 AVCodecContext *avctx; 67 GetBitContext gb; 68 /* init to 0; first frame decode should initialize from extradata and 69 * set this to 1 */ 70 int context_initialized; 71 72 int numchannels; 73 int bytespersample; 74 75 /* buffers */ 76 int32_t *predicterror_buffer[MAX_CHANNELS]; 77 78 int32_t *outputsamples_buffer[MAX_CHANNELS]; 79 80 int32_t *wasted_bits_buffer[MAX_CHANNELS]; 81 82 /* stuff from setinfo */ 83 uint32_t setinfo_max_samples_per_frame; /* 0x1000 = 4096 */ /* max samples per frame? */ 84 uint8_t setinfo_sample_size; /* 0x10 */ 85 uint8_t setinfo_rice_historymult; /* 0x28 */ 86 uint8_t setinfo_rice_initialhistory; /* 0x0a */ 87 uint8_t setinfo_rice_kmodifier; /* 0x0e */ 88 /* end setinfo stuff */ 89 90 int wasted_bits; 91} ALACContext; 92 93static void allocate_buffers(ALACContext *alac) 94{ 95 int chan; 96 for (chan = 0; chan < MAX_CHANNELS; chan++) { 97 alac->predicterror_buffer[chan] = 98 av_malloc(alac->setinfo_max_samples_per_frame * 4); 99 100 alac->outputsamples_buffer[chan] = 101 av_malloc(alac->setinfo_max_samples_per_frame * 4); 102 103 alac->wasted_bits_buffer[chan] = av_malloc(alac->setinfo_max_samples_per_frame * 4); 104 } 105} 106 107static int alac_set_info(ALACContext *alac) 108{ 109 const unsigned char *ptr = alac->avctx->extradata; 110 111 ptr += 4; /* size */ 112 ptr += 4; /* alac */ 113 ptr += 4; /* 0 ? */ 114 115 if(AV_RB32(ptr) >= UINT_MAX/4){ 116 av_log(alac->avctx, AV_LOG_ERROR, "setinfo_max_samples_per_frame too large\n"); 117 return -1; 118 } 119 120 /* buffer size / 2 ? */ 121 alac->setinfo_max_samples_per_frame = bytestream_get_be32(&ptr); 122 ptr++; /* ??? */ 123 alac->setinfo_sample_size = *ptr++; 124 if (alac->setinfo_sample_size > 32) { 125 av_log(alac->avctx, AV_LOG_ERROR, "setinfo_sample_size too large\n"); 126 return -1; 127 } 128 alac->setinfo_rice_historymult = *ptr++; 129 alac->setinfo_rice_initialhistory = *ptr++; 130 alac->setinfo_rice_kmodifier = *ptr++; 131 ptr++; /* channels? */ 132 bytestream_get_be16(&ptr); /* ??? */ 133 bytestream_get_be32(&ptr); /* max coded frame size */ 134 bytestream_get_be32(&ptr); /* bitrate ? */ 135 bytestream_get_be32(&ptr); /* samplerate */ 136 137 allocate_buffers(alac); 138 139 return 0; 140} 141 142static inline int decode_scalar(GetBitContext *gb, int k, int limit, int readsamplesize){ 143 /* read x - number of 1s before 0 represent the rice */ 144 int x = get_unary_0_9(gb); 145 146 if (x > 8) { /* RICE THRESHOLD */ 147 /* use alternative encoding */ 148 x = get_bits(gb, readsamplesize); 149 } else { 150 if (k >= limit) 151 k = limit; 152 153 if (k != 1) { 154 int extrabits = show_bits(gb, k); 155 156 /* multiply x by 2^k - 1, as part of their strange algorithm */ 157 x = (x << k) - x; 158 159 if (extrabits > 1) { 160 x += extrabits - 1; 161 skip_bits(gb, k); 162 } else 163 skip_bits(gb, k - 1); 164 } 165 } 166 return x; 167} 168 169static void bastardized_rice_decompress(ALACContext *alac, 170 int32_t *output_buffer, 171 int output_size, 172 int readsamplesize, /* arg_10 */ 173 int rice_initialhistory, /* arg424->b */ 174 int rice_kmodifier, /* arg424->d */ 175 int rice_historymult, /* arg424->c */ 176 int rice_kmodifier_mask /* arg424->e */ 177 ) 178{ 179 int output_count; 180 unsigned int history = rice_initialhistory; 181 int sign_modifier = 0; 182 183 for (output_count = 0; output_count < output_size; output_count++) { 184 int32_t x; 185 int32_t x_modified; 186 int32_t final_val; 187 188 /* standard rice encoding */ 189 int k; /* size of extra bits */ 190 191 /* read k, that is bits as is */ 192 k = av_log2((history >> 9) + 3); 193 x= decode_scalar(&alac->gb, k, rice_kmodifier, readsamplesize); 194 195 x_modified = sign_modifier + x; 196 final_val = (x_modified + 1) / 2; 197 if (x_modified & 1) final_val *= -1; 198 199 output_buffer[output_count] = final_val; 200 201 sign_modifier = 0; 202 203 /* now update the history */ 204 history += x_modified * rice_historymult 205 - ((history * rice_historymult) >> 9); 206 207 if (x_modified > 0xffff) 208 history = 0xffff; 209 210 /* special case: there may be compressed blocks of 0 */ 211 if ((history < 128) && (output_count+1 < output_size)) { 212 int k; 213 unsigned int block_size; 214 215 sign_modifier = 1; 216 217 k = 7 - av_log2(history) + ((history + 16) >> 6 /* / 64 */); 218 219 block_size= decode_scalar(&alac->gb, k, rice_kmodifier, 16); 220 221 if (block_size > 0) { 222 if(block_size >= output_size - output_count){ 223 av_log(alac->avctx, AV_LOG_ERROR, "invalid zero block size of %d %d %d\n", block_size, output_size, output_count); 224 block_size= output_size - output_count - 1; 225 } 226 memset(&output_buffer[output_count+1], 0, block_size * 4); 227 output_count += block_size; 228 } 229 230 if (block_size > 0xffff) 231 sign_modifier = 0; 232 233 history = 0; 234 } 235 } 236} 237 238static inline int sign_only(int v) 239{ 240 return v ? FFSIGN(v) : 0; 241} 242 243static void predictor_decompress_fir_adapt(int32_t *error_buffer, 244 int32_t *buffer_out, 245 int output_size, 246 int readsamplesize, 247 int16_t *predictor_coef_table, 248 int predictor_coef_num, 249 int predictor_quantitization) 250{ 251 int i; 252 253 /* first sample always copies */ 254 *buffer_out = *error_buffer; 255 256 if (!predictor_coef_num) { 257 if (output_size <= 1) 258 return; 259 260 memcpy(buffer_out+1, error_buffer+1, (output_size-1) * 4); 261 return; 262 } 263 264 if (predictor_coef_num == 0x1f) { /* 11111 - max value of predictor_coef_num */ 265 /* second-best case scenario for fir decompression, 266 * error describes a small difference from the previous sample only 267 */ 268 if (output_size <= 1) 269 return; 270 for (i = 0; i < output_size - 1; i++) { 271 int32_t prev_value; 272 int32_t error_value; 273 274 prev_value = buffer_out[i]; 275 error_value = error_buffer[i+1]; 276 buffer_out[i+1] = 277 sign_extend((prev_value + error_value), readsamplesize); 278 } 279 return; 280 } 281 282 /* read warm-up samples */ 283 if (predictor_coef_num > 0) 284 for (i = 0; i < predictor_coef_num; i++) { 285 int32_t val; 286 287 val = buffer_out[i] + error_buffer[i+1]; 288 val = sign_extend(val, readsamplesize); 289 buffer_out[i+1] = val; 290 } 291 292#if 0 293 /* 4 and 8 are very common cases (the only ones i've seen). these 294 * should be unrolled and optimized 295 */ 296 if (predictor_coef_num == 4) { 297 /* FIXME: optimized general case */ 298 return; 299 } 300 301 if (predictor_coef_table == 8) { 302 /* FIXME: optimized general case */ 303 return; 304 } 305#endif 306 307 /* general case */ 308 if (predictor_coef_num > 0) { 309 for (i = predictor_coef_num + 1; i < output_size; i++) { 310 int j; 311 int sum = 0; 312 int outval; 313 int error_val = error_buffer[i]; 314 315 for (j = 0; j < predictor_coef_num; j++) { 316 sum += (buffer_out[predictor_coef_num-j] - buffer_out[0]) * 317 predictor_coef_table[j]; 318 } 319 320 outval = (1 << (predictor_quantitization-1)) + sum; 321 outval = outval >> predictor_quantitization; 322 outval = outval + buffer_out[0] + error_val; 323 outval = sign_extend(outval, readsamplesize); 324 325 buffer_out[predictor_coef_num+1] = outval; 326 327 if (error_val > 0) { 328 int predictor_num = predictor_coef_num - 1; 329 330 while (predictor_num >= 0 && error_val > 0) { 331 int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; 332 int sign = sign_only(val); 333 334 predictor_coef_table[predictor_num] -= sign; 335 336 val *= sign; /* absolute value */ 337 338 error_val -= ((val >> predictor_quantitization) * 339 (predictor_coef_num - predictor_num)); 340 341 predictor_num--; 342 } 343 } else if (error_val < 0) { 344 int predictor_num = predictor_coef_num - 1; 345 346 while (predictor_num >= 0 && error_val < 0) { 347 int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; 348 int sign = - sign_only(val); 349 350 predictor_coef_table[predictor_num] -= sign; 351 352 val *= sign; /* neg value */ 353 354 error_val -= ((val >> predictor_quantitization) * 355 (predictor_coef_num - predictor_num)); 356 357 predictor_num--; 358 } 359 } 360 361 buffer_out++; 362 } 363 } 364} 365 366static void reconstruct_stereo_16(int32_t *buffer[MAX_CHANNELS], 367 int16_t *buffer_out, 368 int numchannels, int numsamples, 369 uint8_t interlacing_shift, 370 uint8_t interlacing_leftweight) 371{ 372 int i; 373 if (numsamples <= 0) 374 return; 375 376 /* weighted interlacing */ 377 if (interlacing_leftweight) { 378 for (i = 0; i < numsamples; i++) { 379 int32_t a, b; 380 381 a = buffer[0][i]; 382 b = buffer[1][i]; 383 384 a -= (b * interlacing_leftweight) >> interlacing_shift; 385 b += a; 386 387 buffer_out[i*numchannels] = b; 388 buffer_out[i*numchannels + 1] = a; 389 } 390 391 return; 392 } 393 394 /* otherwise basic interlacing took place */ 395 for (i = 0; i < numsamples; i++) { 396 int16_t left, right; 397 398 left = buffer[0][i]; 399 right = buffer[1][i]; 400 401 buffer_out[i*numchannels] = left; 402 buffer_out[i*numchannels + 1] = right; 403 } 404} 405 406static void decorrelate_stereo_24(int32_t *buffer[MAX_CHANNELS], 407 int32_t *buffer_out, 408 int32_t *wasted_bits_buffer[MAX_CHANNELS], 409 int wasted_bits, 410 int numchannels, int numsamples, 411 uint8_t interlacing_shift, 412 uint8_t interlacing_leftweight) 413{ 414 int i; 415 416 if (numsamples <= 0) 417 return; 418 419 /* weighted interlacing */ 420 if (interlacing_leftweight) { 421 for (i = 0; i < numsamples; i++) { 422 int32_t a, b; 423 424 a = buffer[0][i]; 425 b = buffer[1][i]; 426 427 a -= (b * interlacing_leftweight) >> interlacing_shift; 428 b += a; 429 430 if (wasted_bits) { 431 b = (b << wasted_bits) | wasted_bits_buffer[0][i]; 432 a = (a << wasted_bits) | wasted_bits_buffer[1][i]; 433 } 434 435 buffer_out[i * numchannels] = b << 8; 436 buffer_out[i * numchannels + 1] = a << 8; 437 } 438 } else { 439 for (i = 0; i < numsamples; i++) { 440 int32_t left, right; 441 442 left = buffer[0][i]; 443 right = buffer[1][i]; 444 445 if (wasted_bits) { 446 left = (left << wasted_bits) | wasted_bits_buffer[0][i]; 447 right = (right << wasted_bits) | wasted_bits_buffer[1][i]; 448 } 449 450 buffer_out[i * numchannels] = left << 8; 451 buffer_out[i * numchannels + 1] = right << 8; 452 } 453 } 454} 455 456static int alac_decode_frame(AVCodecContext *avctx, 457 void *outbuffer, int *outputsize, 458 AVPacket *avpkt) 459{ 460 const uint8_t *inbuffer = avpkt->data; 461 int input_buffer_size = avpkt->size; 462 ALACContext *alac = avctx->priv_data; 463 464 int channels; 465 unsigned int outputsamples; 466 int hassize; 467 unsigned int readsamplesize; 468 int isnotcompressed; 469 uint8_t interlacing_shift; 470 uint8_t interlacing_leftweight; 471 472 /* short-circuit null buffers */ 473 if (!inbuffer || !input_buffer_size) 474 return input_buffer_size; 475 476 /* initialize from the extradata */ 477 if (!alac->context_initialized) { 478 if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) { 479 av_log(avctx, AV_LOG_ERROR, "alac: expected %d extradata bytes\n", 480 ALAC_EXTRADATA_SIZE); 481 return input_buffer_size; 482 } 483 if (alac_set_info(alac)) { 484 av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n"); 485 return input_buffer_size; 486 } 487 alac->context_initialized = 1; 488 } 489 490 init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8); 491 492 channels = get_bits(&alac->gb, 3) + 1; 493 if (channels > MAX_CHANNELS) { 494 av_log(avctx, AV_LOG_ERROR, "channels > %d not supported\n", 495 MAX_CHANNELS); 496 return input_buffer_size; 497 } 498 499 /* 2^result = something to do with output waiting. 500 * perhaps matters if we read > 1 frame in a pass? 501 */ 502 skip_bits(&alac->gb, 4); 503 504 skip_bits(&alac->gb, 12); /* unknown, skip 12 bits */ 505 506 /* the output sample size is stored soon */ 507 hassize = get_bits1(&alac->gb); 508 509 alac->wasted_bits = get_bits(&alac->gb, 2) << 3; 510 511 /* whether the frame is compressed */ 512 isnotcompressed = get_bits1(&alac->gb); 513 514 if (hassize) { 515 /* now read the number of samples as a 32bit integer */ 516 outputsamples = get_bits_long(&alac->gb, 32); 517 if(outputsamples > alac->setinfo_max_samples_per_frame){ 518 av_log(avctx, AV_LOG_ERROR, "outputsamples %d > %d\n", outputsamples, alac->setinfo_max_samples_per_frame); 519 return -1; 520 } 521 } else 522 outputsamples = alac->setinfo_max_samples_per_frame; 523 524 switch (alac->setinfo_sample_size) { 525 case 16: avctx->sample_fmt = SAMPLE_FMT_S16; 526 alac->bytespersample = channels << 1; 527 break; 528 case 24: avctx->sample_fmt = SAMPLE_FMT_S32; 529 alac->bytespersample = channels << 2; 530 break; 531 default: av_log(avctx, AV_LOG_ERROR, "Sample depth %d is not supported.\n", 532 alac->setinfo_sample_size); 533 return -1; 534 } 535 536 if(outputsamples > *outputsize / alac->bytespersample){ 537 av_log(avctx, AV_LOG_ERROR, "sample buffer too small\n"); 538 return -1; 539 } 540 541 *outputsize = outputsamples * alac->bytespersample; 542 readsamplesize = alac->setinfo_sample_size - (alac->wasted_bits) + channels - 1; 543 if (readsamplesize > MIN_CACHE_BITS) { 544 av_log(avctx, AV_LOG_ERROR, "readsamplesize too big (%d)\n", readsamplesize); 545 return -1; 546 } 547 548 if (!isnotcompressed) { 549 /* so it is compressed */ 550 int16_t predictor_coef_table[channels][32]; 551 int predictor_coef_num[channels]; 552 int prediction_type[channels]; 553 int prediction_quantitization[channels]; 554 int ricemodifier[channels]; 555 int i, chan; 556 557 interlacing_shift = get_bits(&alac->gb, 8); 558 interlacing_leftweight = get_bits(&alac->gb, 8); 559 560 for (chan = 0; chan < channels; chan++) { 561 prediction_type[chan] = get_bits(&alac->gb, 4); 562 prediction_quantitization[chan] = get_bits(&alac->gb, 4); 563 564 ricemodifier[chan] = get_bits(&alac->gb, 3); 565 predictor_coef_num[chan] = get_bits(&alac->gb, 5); 566 567 /* read the predictor table */ 568 for (i = 0; i < predictor_coef_num[chan]; i++) 569 predictor_coef_table[chan][i] = (int16_t)get_bits(&alac->gb, 16); 570 } 571 572 if (alac->wasted_bits) { 573 int i, ch; 574 for (i = 0; i < outputsamples; i++) { 575 for (ch = 0; ch < channels; ch++) 576 alac->wasted_bits_buffer[ch][i] = get_bits(&alac->gb, alac->wasted_bits); 577 } 578 } 579 for (chan = 0; chan < channels; chan++) { 580 bastardized_rice_decompress(alac, 581 alac->predicterror_buffer[chan], 582 outputsamples, 583 readsamplesize, 584 alac->setinfo_rice_initialhistory, 585 alac->setinfo_rice_kmodifier, 586 ricemodifier[chan] * alac->setinfo_rice_historymult / 4, 587 (1 << alac->setinfo_rice_kmodifier) - 1); 588 589 if (prediction_type[chan] == 0) { 590 /* adaptive fir */ 591 predictor_decompress_fir_adapt(alac->predicterror_buffer[chan], 592 alac->outputsamples_buffer[chan], 593 outputsamples, 594 readsamplesize, 595 predictor_coef_table[chan], 596 predictor_coef_num[chan], 597 prediction_quantitization[chan]); 598 } else { 599 av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type[chan]); 600 /* I think the only other prediction type (or perhaps this is 601 * just a boolean?) runs adaptive fir twice.. like: 602 * predictor_decompress_fir_adapt(predictor_error, tempout, ...) 603 * predictor_decompress_fir_adapt(predictor_error, outputsamples ...) 604 * little strange.. 605 */ 606 } 607 } 608 } else { 609 /* not compressed, easy case */ 610 int i, chan; 611 if (alac->setinfo_sample_size <= 16) { 612 for (i = 0; i < outputsamples; i++) 613 for (chan = 0; chan < channels; chan++) { 614 int32_t audiobits; 615 616 audiobits = get_sbits_long(&alac->gb, alac->setinfo_sample_size); 617 618 alac->outputsamples_buffer[chan][i] = audiobits; 619 } 620 } else { 621 for (i = 0; i < outputsamples; i++) { 622 for (chan = 0; chan < channels; chan++) { 623 alac->outputsamples_buffer[chan][i] = get_bits(&alac->gb, 624 alac->setinfo_sample_size); 625 alac->outputsamples_buffer[chan][i] = sign_extend(alac->outputsamples_buffer[chan][i], 626 alac->setinfo_sample_size); 627 } 628 } 629 } 630 alac->wasted_bits = 0; 631 interlacing_shift = 0; 632 interlacing_leftweight = 0; 633 } 634 if (get_bits(&alac->gb, 3) != 7) 635 av_log(avctx, AV_LOG_ERROR, "Error : Wrong End Of Frame\n"); 636 637 switch(alac->setinfo_sample_size) { 638 case 16: 639 if (channels == 2) { 640 reconstruct_stereo_16(alac->outputsamples_buffer, 641 (int16_t*)outbuffer, 642 alac->numchannels, 643 outputsamples, 644 interlacing_shift, 645 interlacing_leftweight); 646 } else { 647 int i; 648 for (i = 0; i < outputsamples; i++) { 649 ((int16_t*)outbuffer)[i] = alac->outputsamples_buffer[0][i]; 650 } 651 } 652 break; 653 case 24: 654 if (channels == 2) { 655 decorrelate_stereo_24(alac->outputsamples_buffer, 656 outbuffer, 657 alac->wasted_bits_buffer, 658 alac->wasted_bits, 659 alac->numchannels, 660 outputsamples, 661 interlacing_shift, 662 interlacing_leftweight); 663 } else { 664 int i; 665 for (i = 0; i < outputsamples; i++) 666 ((int32_t *)outbuffer)[i] = alac->outputsamples_buffer[0][i] << 8; 667 } 668 break; 669 } 670 671 if (input_buffer_size * 8 - get_bits_count(&alac->gb) > 8) 672 av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n", input_buffer_size * 8 - get_bits_count(&alac->gb)); 673 674 return input_buffer_size; 675} 676 677static av_cold int alac_decode_init(AVCodecContext * avctx) 678{ 679 ALACContext *alac = avctx->priv_data; 680 alac->avctx = avctx; 681 alac->context_initialized = 0; 682 683 alac->numchannels = alac->avctx->channels; 684 685 return 0; 686} 687 688static av_cold int alac_decode_close(AVCodecContext *avctx) 689{ 690 ALACContext *alac = avctx->priv_data; 691 692 int chan; 693 for (chan = 0; chan < MAX_CHANNELS; chan++) { 694 av_freep(&alac->predicterror_buffer[chan]); 695 av_freep(&alac->outputsamples_buffer[chan]); 696 av_freep(&alac->wasted_bits_buffer[chan]); 697 } 698 699 return 0; 700} 701 702AVCodec alac_decoder = { 703 "alac", 704 AVMEDIA_TYPE_AUDIO, 705 CODEC_ID_ALAC, 706 sizeof(ALACContext), 707 alac_decode_init, 708 NULL, 709 alac_decode_close, 710 alac_decode_frame, 711 .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"), 712}; 713