feeder_rate.c revision 110108
1/* 2 * Copyright (c) 2003 Orion Hodson <orion@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * MAINTAINER: Orion Hodson <orion@freebsd.org> 27 * 28 * This rate conversion code uses linear interpolation without any 29 * pre- or post- interpolation filtering to combat aliasing. This 30 * greatly limits the sound quality and should be addressed at some 31 * stage in the future. 32 * 33 * Since this accuracy of interpolation is sensitive and examination 34 * of the algorithm output is harder from the kernel, th code is 35 * designed to be compiled in the kernel and in a userland test 36 * harness. This is done by selectively including and excluding code 37 * with several portions based on whether _KERNEL is defined. It's a 38 * little ugly, but exceedingly useful. The testsuite and its 39 * revisions can be found at: 40 * http://people.freebsd.org/~orion/feedrate/ 41 * 42 * Special thanks to Ken Marx for exposing flaws in the code and for 43 * testing revisions. 44 */ 45 46#ifdef _KERNEL 47 48#include <dev/sound/pcm/sound.h> 49#include "feeder_if.h" 50 51SND_DECLARE_FILE("$FreeBSD: head/sys/dev/sound/pcm/feeder_rate.c 110108 2003-01-30 16:32:56Z orion $"); 52MALLOC_DEFINE(M_RATEFEEDER, "ratefeed", "pcm rate feeder"); 53 54#define RATE_ASSERT(x, y) /* KASSERT(x,y) */ 55#define RATE_TRACE(x...) /* printf(x) */ 56 57#endif /* _KERNEL */ 58 59/*****************************************************************************/ 60/* All of the following coefficients are coupled. They are chosen to be 61 * good in the operating space 4000-96000kHz work. Decreasing the 62 * granularity increases the required buffer size and affects the gain 63 * values at different points in the space. These values were found by 64 * running the test program with -p (probe) and some trial and error. 65 * 66 * ROUNDHZ the granularity of sample rates (fits n*11025 and n*8000). 67 * FEEDBUFSZ the amount of buffer space. 68 * MINGAIN the minimum acceptable gain in coefficients search. 69 */ 70#define ROUNDHZ 25 71#define FEEDBUFSZ 8192 72#define MINGAIN 92 73 74#define RATEMIN 4000 75#define RATEMAX 96000 76 77struct feed_rate_info; 78 79typedef int (*rate_convert_method)(struct feed_rate_info *, 80 uint32_t, uint32_t, int16_t *); 81 82static int 83convert_stereo_up(struct feed_rate_info *info, 84 uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst); 85 86static int 87convert_stereo_down(struct feed_rate_info *info, 88 uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst); 89 90struct feed_rate_info { 91 uint32_t src, dst; /* source and destination rates */ 92 uint16_t buffer_ticks; /* number of available samples in buffer */ 93 uint16_t buffer_pos; /* next available sample in buffer */ 94 uint16_t rounds; /* maximum number of cycle rounds w buffer */ 95 uint16_t alpha; /* interpolation distance */ 96 uint16_t sscale; /* src clock scale */ 97 uint16_t dscale; /* dst clock scale */ 98 uint16_t mscale; /* scale factor to avoid divide per sample */ 99 uint16_t mroll; /* roll to again avoid divide per sample */ 100 uint16_t channels; /* 1 = mono, 2 = stereo */ 101 102 rate_convert_method convert; 103 int16_t buffer[FEEDBUFSZ]; 104}; 105 106#define bytes_per_sample 2 107#define src_ticks_per_cycle(info) (info->dscale * info->rounds) 108#define dst_ticks_per_cycle(info) (info->sscale * info->rounds) 109#define bytes_per_tick(info) (info->channels * bytes_per_sample) 110#define src_bytes_per_cycle(info) \ 111 (src_ticks_per_cycle(info) * bytes_per_tick(info)) 112#define dst_bytes_per_cycle(info) \ 113 (dst_ticks_per_cycle(info) * bytes_per_tick(info)) 114 115static uint32_t 116gcd(uint32_t x, uint32_t y) 117{ 118 uint32_t w; 119 while (y != 0) { 120 w = x % y; 121 x = y; 122 y = w; 123 } 124 return x; 125} 126 127static int 128feed_rate_setup(struct pcm_feeder *f) 129{ 130 struct feed_rate_info *info = f->data; 131 uint32_t mscale, mroll, l, r, g; 132 133 /* Beat sample rates down by greatest common divisor */ 134 g = gcd(info->src, info->dst); 135 info->sscale = info->dst / g; 136 info->dscale = info->src / g; 137 138 info->alpha = 0; 139 info->buffer_ticks = 0; 140 info->buffer_pos = 0; 141 142 /* Pick suitable conversion routine */ 143 if (info->src > info->dst) { 144 info->convert = convert_stereo_down; 145 } else { 146 info->convert = convert_stereo_up; 147 } 148 149 /* 150 * Determine number of conversion rounds that will fit into 151 * buffer. NB Must set info->rounds to one before using 152 * src_ticks_per_cycle here since it used by src_ticks_per_cycle. 153 */ 154 info->rounds = 1; 155 r = (FEEDBUFSZ - bytes_per_tick(info)) / 156 (src_ticks_per_cycle(info) * bytes_per_tick(info)); 157 if (r == 0) { 158 RATE_TRACE("Insufficient buffer space for conversion %d -> %d " 159 "(%d < %d)\n", info->src, info->dst, FEEDBUFSZ, 160 src_ticks_per_cycle(info) * bytes_per_tick(info)); 161 return -1; 162 } 163 info->rounds = r; 164 165 /* 166 * Find scale and roll combination that allows us to trade 167 * costly divide operations in the main loop for multiply-rolls. 168 */ 169 for (l = 96; l >= MINGAIN; l -= 3) { 170 for (mroll = 0; mroll < 16; mroll ++) { 171 mscale = (1 << mroll) / info->sscale; 172 173 r = (mscale * info->sscale * 100) >> mroll; 174 if (r > l && r <= 100) { 175 info->mscale = mscale; 176 info->mroll = mroll; 177 RATE_TRACE("Converting %d to %d with " 178 "mscale = %d and mroll = %d " 179 "(gain = %d / 100)\n", 180 info->src, info->dst, 181 info->mscale, info->mroll, r); 182 return 0; 183 } 184 } 185 } 186 187 RATE_TRACE("Failed to find a converter within %d%% gain for " 188 "%d to %d.\n", l, info->src, info->dst); 189 190 return -2; 191} 192 193static int 194feed_rate_set(struct pcm_feeder *f, int what, int value) 195{ 196 struct feed_rate_info *info = f->data; 197 int rvalue; 198 199 if (value < RATEMIN || value > RATEMAX) { 200 return -1; 201 } 202 203 rvalue = (value / ROUNDHZ) * ROUNDHZ; 204 if (value - rvalue > ROUNDHZ / 2) { 205 rvalue += ROUNDHZ; 206 } 207 208 switch(what) { 209 case FEEDRATE_SRC: 210 info->src = rvalue; 211 break; 212 case FEEDRATE_DST: 213 info->dst = rvalue; 214 break; 215 default: 216 return -1; 217 } 218 219 return feed_rate_setup(f); 220} 221 222static int 223feed_rate_get(struct pcm_feeder *f, int what) 224{ 225 struct feed_rate_info *info = f->data; 226 227 switch(what) { 228 case FEEDRATE_SRC: 229 return info->src; 230 case FEEDRATE_DST: 231 return info->dst; 232 default: 233 return -1; 234 } 235 return -1; 236} 237 238static int 239feed_rate_init(struct pcm_feeder *f) 240{ 241 struct feed_rate_info *info; 242 243 info = malloc(sizeof(*info), M_RATEFEEDER, M_NOWAIT | M_ZERO); 244 if (info == NULL) 245 return ENOMEM; 246 247 info->src = DSP_DEFAULT_SPEED; 248 info->dst = DSP_DEFAULT_SPEED; 249 info->channels = 2; 250 251 f->data = info; 252 return 0; 253} 254 255static int 256feed_rate_free(struct pcm_feeder *f) 257{ 258 struct feed_rate_info *info = f->data; 259 260 if (info) { 261 free(info, M_RATEFEEDER); 262 } 263 f->data = NULL; 264 return 0; 265} 266 267static int 268convert_stereo_up(struct feed_rate_info *info, 269 uint32_t src_ticks, 270 uint32_t dst_ticks, 271 int16_t *dst) 272{ 273 uint32_t max_dst_ticks; 274 int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o; 275 int16_t *src; 276 277 sp = info->buffer_pos * 2; 278 se = sp + src_ticks * 2; 279 280 src = info->buffer; 281 alpha = info->alpha * info->mscale; 282 dalpha = info->dscale * info->mscale; /* Alpha increment */ 283 malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */ 284 mroll = info->mroll; 285 286 /* 287 * For efficiency the main conversion loop should only depend on 288 * one variable. We use the state to work out the maximum number 289 * of output samples that are available and eliminate the checking of 290 * sp from the loop. 291 */ 292 max_dst_ticks = src_ticks * info->dst / info->src - alpha / dalpha; 293 if (max_dst_ticks < dst_ticks) { 294 dst_ticks = max_dst_ticks; 295 } 296 297 dp = 0; 298 de = dst_ticks * 2; 299 /* 300 * Unrolling this loop manually does not help much here because 301 * of the alpha, malpha comparison. 302 */ 303 while (dp < de) { 304 o = malpha - alpha; 305 x = alpha * src[sp + 2] + o * src[sp]; 306 dst[dp++] = x >> mroll; 307 x = alpha * src[sp + 3] + o * src[sp + 1]; 308 dst[dp++] = x >> mroll; 309 alpha += dalpha; 310 if (alpha >= malpha) { 311 alpha -= malpha; 312 sp += 2; 313 } 314 } 315 RATE_ASSERT(sp <= se, ("%s: Source overrun\n", __func__)); 316 317 info->buffer_pos = sp / info->channels; 318 info->alpha = alpha / info->mscale; 319 320 return dp / info->channels; 321} 322 323static int 324convert_stereo_down(struct feed_rate_info *info, 325 uint32_t src_ticks, 326 uint32_t dst_ticks, 327 int16_t *dst) 328{ 329 int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o, m, 330 mdalpha, mstep; 331 int16_t *src; 332 333 sp = info->buffer_pos * 2; 334 se = sp + src_ticks * 2; 335 336 src = info->buffer; 337 alpha = info->alpha * info->mscale; 338 dalpha = info->dscale * info->mscale; /* Alpha increment */ 339 malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */ 340 mroll = info->mroll; 341 342 dp = 0; 343 de = dst_ticks * 2; 344 345 m = dalpha / malpha; 346 mstep = m * 2; 347 mdalpha = dalpha - m * malpha; 348 349 /* 350 * TODO: eliminate sp or dp from this loop comparison for a few 351 * extra % performance. 352 */ 353 while (sp < se && dp < de) { 354 o = malpha - alpha; 355 x = alpha * src[sp + 2] + o * src[sp]; 356 dst[dp++] = x >> mroll; 357 x = alpha * src[sp + 3] + o * src[sp + 1]; 358 dst[dp++] = x >> mroll; 359 360 alpha += mdalpha; 361 sp += mstep; 362 if (alpha >= malpha) { 363 alpha -= malpha; 364 sp += 2; 365 } 366 } 367 368 info->buffer_pos = sp / 2; 369 info->alpha = alpha / info->mscale; 370 371 RATE_ASSERT(info->buffer_pos <= info->buffer_ticks, 372 ("%s: Source overrun\n", __func__)); 373 374 return dp / 2; 375} 376 377static int 378feed_rate(struct pcm_feeder *f, 379 struct pcm_channel *c, 380 uint8_t *b, 381 uint32_t count, 382 void *source) 383{ 384 struct feed_rate_info *info = f->data; 385 386 uint32_t done, s_ticks, d_ticks; 387 done = 0; 388 389 RATE_ASSERT(info->channels == 2, 390 ("%s: channels (%d) != 2", __func__, info->channels)); 391 392 while (done < count) { 393 /* Slurp in more data if input buffer is not full */ 394 while (info->buffer_ticks < src_ticks_per_cycle(info)) { 395 uint8_t *u8b; 396 int fetch; 397 fetch = src_bytes_per_cycle(info) - 398 info->buffer_ticks * bytes_per_tick(info); 399 u8b = (uint8_t*)info->buffer + 400 (info->buffer_ticks + 1) * 401 bytes_per_tick(info); 402 fetch = FEEDER_FEED(f->source, c, u8b, fetch, source); 403 RATE_ASSERT(fetch % bytes_per_tick(info) == 0, 404 ("%s: fetched unaligned bytes (%d)", 405 __func__, fetch)); 406 info->buffer_ticks += fetch / bytes_per_tick(info); 407 RATE_ASSERT(src_ticks_per_cycle(info) >= 408 info->buffer_ticks, 409 ("%s: buffer overfilled (%d > %d).", 410 __func__, info->buffer_ticks, 411 src_ticks_per_cycle(info))); 412 if (fetch == 0) 413 break; 414 } 415 416 /* Find amount of input buffer data that should be processed */ 417 d_ticks = (count - done) / bytes_per_tick(info); 418 s_ticks = info->buffer_ticks - info->buffer_pos; 419 if (info->buffer_ticks != src_ticks_per_cycle(info)) { 420 if (s_ticks > 8) 421 s_ticks -= 8; 422 else 423 s_ticks = 0; 424 } 425 426 d_ticks = info->convert(info, s_ticks, d_ticks, 427 (int16_t*)(b + done)); 428 if (d_ticks == 0) 429 break; 430 done += d_ticks * bytes_per_tick(info); 431 432 RATE_ASSERT(info->buffer_pos <= info->buffer_ticks, 433 ("%s: buffer_ticks too big\n", __func__)); 434 RATE_TRACE("%s: ticks %5d pos %d\n", __func__, 435 info->buffer_ticks, info->buffer_pos); 436 437 if (src_ticks_per_cycle(info) <= info->buffer_pos) { 438 /* End of cycle reached, copy last samples to start */ 439 uint8_t *u8b; 440 u8b = (uint8_t*)info->buffer; 441 bcopy(u8b + src_bytes_per_cycle(info), u8b, 442 bytes_per_tick(info)); 443 444 RATE_ASSERT(info->alpha == 0, 445 ("%s: completed cycle with " 446 "alpha non-zero", __func__, info->alpha)); 447 448 info->buffer_pos = 0; 449 info->buffer_ticks = 0; 450 } 451 } 452 453 RATE_ASSERT(count >= done, 454 ("%s: generated too many bytes of data (%d > %d).", 455 __func__, done, count)); 456 457 if (done != count) { 458 RATE_TRACE("Only did %d of %d\n", done, count); 459 } 460 461 return done; 462} 463 464#ifdef _KERNEL 465 466static struct pcm_feederdesc feeder_rate_desc[] = { 467 {FEEDER_RATE, AFMT_S16_LE | AFMT_STEREO, AFMT_S16_LE | AFMT_STEREO, 0}, 468 {0}, 469}; 470static kobj_method_t feeder_rate_methods[] = { 471 KOBJMETHOD(feeder_init, feed_rate_init), 472 KOBJMETHOD(feeder_free, feed_rate_free), 473 KOBJMETHOD(feeder_set, feed_rate_set), 474 KOBJMETHOD(feeder_get, feed_rate_get), 475 KOBJMETHOD(feeder_feed, feed_rate), 476 { 0, 0 } 477}; 478FEEDER_DECLARE(feeder_rate, 2, NULL); 479 480#endif /* _KERNEL */ 481