feeder_rate.c revision 113752
1169691Skan/*
2169691Skan * Copyright (c) 2003 Orion Hodson <orion@freebsd.org>
3169691Skan * All rights reserved.
4169691Skan *
5169691Skan * Redistribution and use in source and binary forms, with or without
6169691Skan * modification, are permitted provided that the following conditions
7169691Skan * are met:
8169691Skan * 1. Redistributions of source code must retain the above copyright
9169691Skan *    notice, this list of conditions and the following disclaimer.
10169691Skan * 2. Redistributions in binary form must reproduce the above copyright
11169691Skan *    notice, this list of conditions and the following disclaimer in the
12169691Skan *    documentation and/or other materials provided with the distribution.
13169691Skan *
14169691Skan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15169691Skan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16169691Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17169691Skan * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18169691Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19169691Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20169691Skan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21169691Skan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22169691Skan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23169691Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24169691Skan * SUCH DAMAGE.
25169691Skan *
26169691Skan * MAINTAINER: Orion Hodson <orion@freebsd.org>
27169691Skan *
28169691Skan * This rate conversion code uses linear interpolation without any
29169691Skan * pre- or post- interpolation filtering to combat aliasing.  This
30169691Skan * greatly limits the sound quality and should be addressed at some
31169691Skan * stage in the future.
32169691Skan *
33169691Skan * Since this accuracy of interpolation is sensitive and examination
34169691Skan * of the algorithm output is harder from the kernel, th code is
35169691Skan * designed to be compiled in the kernel and in a userland test
36169691Skan * harness.  This is done by selectively including and excluding code
37169691Skan * with several portions based on whether _KERNEL is defined.  It's a
38169691Skan * little ugly, but exceedingly useful.  The testsuite and its
39169691Skan * revisions can be found at:
40169691Skan *		http://people.freebsd.org/~orion/feedrate/
41169691Skan *
42169691Skan * Special thanks to Ken Marx for exposing flaws in the code and for
43169691Skan * testing revisions.
44169691Skan */
45169691Skan
46169691Skan#ifdef _KERNEL
47169691Skan
48169691Skan#include <dev/sound/pcm/sound.h>
49169691Skan#include "feeder_if.h"
50169691Skan
51169691SkanSND_DECLARE_FILE("$FreeBSD: head/sys/dev/sound/pcm/feeder_rate.c 113752 2003-04-20 17:08:56Z orion $");
52169691Skan
53169691Skan#endif /* _KERNEL */
54169691Skan
55169691SkanMALLOC_DEFINE(M_RATEFEEDER, "ratefeed", "pcm rate feeder");
56169691Skan
57169691Skan#ifndef RATE_ASSERT
58169691Skan#define RATE_ASSERT(x, y) /* KASSERT(x) */
59169691Skan#endif /* RATE_ASSERT */
60169691Skan
61169691Skan#ifndef RATE_TRACE
62169691Skan#define RATE_TRACE(x...)  /* printf(x) */
63169691Skan#endif
64169691Skan
65169691Skan/*****************************************************************************/
66169691Skan
67169691Skan/* The following coefficients are coupled.  They are chosen to be
68169691Skan * guarantee calculable factors for the interpolation routine.  They
69169691Skan * have been tested over the range of RATEMIN-RATEMAX Hz.  Decreasing
70169691Skan * the granularity increases the required buffer size and affects the
71169691Skan * gain values at different points in the space.  These values were
72169691Skan * found by running the test program with -p (probe) and some trial
73169691Skan * and error.
74169691Skan *
75169691Skan * ROUNDHZ	the granularity of sample rates (fits n*11025 and n*8000).
76169691Skan * FEEDBUFSZ	the amount of buffer space.
77169691Skan * MINGAIN	the minimum acceptable gain in coefficients search.
78169691Skan */
79169691Skan#define ROUNDHZ			   25
80169691Skan#define FEEDBUFSZ 		 8192
81169691Skan#define MINGAIN			   92
82169691Skan
83169691Skan#define RATEMIN  		 4000
84169691Skan#define RATEMAX 		48000
85169691Skan
86169691Skanstruct feed_rate_info;
87169691Skan
88169691Skantypedef int (*rate_convert_method)(struct feed_rate_info *,
89169691Skan				   uint32_t, uint32_t, int16_t *);
90169691Skan
91169691Skanstatic int
92169691Skanconvert_stereo_up(struct feed_rate_info *info,
93169691Skan		  uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst);
94169691Skan
95169691Skanstatic int
96169691Skanconvert_stereo_down(struct feed_rate_info *info,
97169691Skan		    uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst);
98169691Skan
99169691Skanstruct feed_rate_info {
100169691Skan	uint32_t src, dst;	/* source and destination rates */
101169691Skan	uint16_t buffer_ticks;	/* number of available samples in buffer */
102169691Skan	uint16_t buffer_pos;	/* next available sample in buffer */
103169691Skan	uint16_t rounds; 	/* maximum number of cycle rounds w buffer */
104169691Skan	uint16_t alpha;		/* interpolation distance */
105169691Skan        uint16_t sscale;        /* src clock scale */
106169691Skan        uint16_t dscale;        /* dst clock scale */
107169691Skan        uint16_t mscale;        /* scale factor to avoid divide per sample */
108169691Skan        uint16_t mroll;         /* roll to again avoid divide per sample */
109169691Skan	uint16_t channels;	/* 1 = mono, 2 = stereo */
110169691Skan
111169691Skan	rate_convert_method convert;
112169691Skan    	int16_t  buffer[FEEDBUFSZ];
113169691Skan};
114169691Skan
115169691Skan#define bytes_per_sample		2
116169691Skan#define src_ticks_per_cycle(info)	(info->dscale * info->rounds)
117169691Skan#define dst_ticks_per_cycle(info)	(info->sscale * info->rounds)
118169691Skan#define bytes_per_tick(info)		(info->channels * bytes_per_sample)
119169691Skan#define src_bytes_per_cycle(info) 					      \
120169691Skan        		(src_ticks_per_cycle(info) * bytes_per_tick(info))
121169691Skan#define dst_bytes_per_cycle(info) 					      \
122169691Skan        		(dst_ticks_per_cycle(info) * bytes_per_tick(info))
123169691Skan
124169691Skanstatic uint32_t
125169691Skangcd(uint32_t x, uint32_t y)
126169691Skan{
127169691Skan	uint32_t w;
128169691Skan	while (y != 0) {
129169691Skan		w = x % y;
130169691Skan		x = y;
131169691Skan		y = w;
132169691Skan	}
133169691Skan	return x;
134169691Skan}
135169691Skan
136169691Skanstatic int
137169691Skanfeed_rate_setup(struct pcm_feeder *f)
138169691Skan{
139169691Skan	struct feed_rate_info *info = f->data;
140169691Skan        uint32_t mscale, mroll, l, r, g;
141169691Skan
142169691Skan	/* Beat sample rates down by greatest common divisor */
143169691Skan	g = gcd(info->src, info->dst);
144169691Skan	info->sscale = info->dst / g;
145169691Skan	info->dscale = info->src / g;
146169691Skan
147169691Skan	info->alpha = 0;
148169691Skan	info->buffer_ticks = 0;
149169691Skan	info->buffer_pos = 0;
150169691Skan
151169691Skan	/* Pick suitable conversion routine */
152169691Skan	if (info->src > info->dst) {
153169691Skan		info->convert = convert_stereo_down;
154169691Skan	} else {
155169691Skan		info->convert = convert_stereo_up;
156169691Skan	}
157169691Skan
158169691Skan	/*
159169691Skan	 * Determine number of conversion rounds that will fit into
160169691Skan	 * buffer.  NB Must set info->rounds to one before using
161169691Skan	 * src_ticks_per_cycle here since it used by src_ticks_per_cycle.
162169691Skan	 */
163169691Skan	info->rounds = 1;
164169691Skan	r = (FEEDBUFSZ - bytes_per_tick(info)) /
165169691Skan		(src_ticks_per_cycle(info) * bytes_per_tick(info));
166169691Skan	if (r == 0) {
167169691Skan		RATE_TRACE("Insufficient buffer space for conversion %d -> %d "
168169691Skan			   "(%d < %d)\n", info->src, info->dst, FEEDBUFSZ,
169169691Skan			   src_ticks_per_cycle(info) * bytes_per_tick(info));
170169691Skan		return -1;
171169691Skan	}
172169691Skan	info->rounds = r;
173169691Skan
174169691Skan	/*
175169691Skan	 * Find scale and roll combination that allows us to trade
176169691Skan	 * costly divide operations in the main loop for multiply-rolls.
177169691Skan	 */
178169691Skan        for (l = 96; l >= MINGAIN; l -= 3) {
179169691Skan		for (mroll = 0; mroll < 16; mroll ++) {
180169691Skan			mscale = (1 << mroll) / info->sscale;
181169691Skan
182169691Skan                        r = (mscale * info->sscale * 100) >> mroll;
183169691Skan                        if (r > l && r <= 100) {
184169691Skan                                info->mscale = mscale;
185169691Skan                                info->mroll = mroll;
186169691Skan                                RATE_TRACE("Converting %d to %d with "
187169691Skan					   "mscale = %d and mroll = %d "
188169691Skan					   "(gain = %d / 100)\n",
189169691Skan					   info->src, info->dst,
190169691Skan					   info->mscale, info->mroll, r);
191169691Skan                                return 0;
192169691Skan                        }
193169691Skan                }
194169691Skan        }
195169691Skan
196169691Skan	RATE_TRACE("Failed to find a converter within %d%% gain for "
197169691Skan		   "%d to %d.\n", l, info->src, info->dst);
198169691Skan
199169691Skan        return -2;
200169691Skan}
201169691Skan
202169691Skanstatic int
203169691Skanfeed_rate_set(struct pcm_feeder *f, int what, int value)
204169691Skan{
205169691Skan	struct feed_rate_info *info = f->data;
206169691Skan	int rvalue;
207169691Skan
208169691Skan	if (value < RATEMIN || value > RATEMAX) {
209169691Skan		return -1;
210169691Skan	}
211169691Skan
212169691Skan	rvalue = (value / ROUNDHZ) * ROUNDHZ;
213169691Skan	if (value - rvalue > ROUNDHZ / 2) {
214169691Skan	    rvalue += ROUNDHZ;
215169691Skan	}
216169691Skan
217169691Skan	switch(what) {
218169691Skan	case FEEDRATE_SRC:
219169691Skan		info->src = rvalue;
220169691Skan		break;
221169691Skan	case FEEDRATE_DST:
222169691Skan		info->dst = rvalue;
223169691Skan		break;
224169691Skan	default:
225169691Skan		return -1;
226169691Skan	}
227169691Skan
228169691Skan	return feed_rate_setup(f);
229169691Skan}
230169691Skan
231169691Skanstatic int
232169691Skanfeed_rate_get(struct pcm_feeder *f, int what)
233169691Skan{
234169691Skan	struct feed_rate_info *info = f->data;
235169691Skan
236169691Skan	switch(what) {
237169691Skan	case FEEDRATE_SRC:
238169691Skan		return info->src;
239169691Skan	case FEEDRATE_DST:
240169691Skan		return info->dst;
241169691Skan	default:
242169691Skan		return -1;
243169691Skan	}
244169691Skan	return -1;
245169691Skan}
246169691Skan
247169691Skanstatic int
248169691Skanfeed_rate_init(struct pcm_feeder *f)
249169691Skan{
250169691Skan	struct feed_rate_info *info;
251169691Skan
252169691Skan	info = malloc(sizeof(*info), M_RATEFEEDER, M_NOWAIT | M_ZERO);
253169691Skan	if (info == NULL)
254169691Skan		return ENOMEM;
255169691Skan	info->src = DSP_DEFAULT_SPEED;
256169691Skan	info->dst = DSP_DEFAULT_SPEED;
257169691Skan	info->channels = 2;
258169691Skan
259169691Skan	f->data = info;
260169691Skan	return 0;
261169691Skan}
262169691Skan
263169691Skanstatic int
264169691Skanfeed_rate_free(struct pcm_feeder *f)
265169691Skan{
266169691Skan	struct feed_rate_info *info = f->data;
267169691Skan
268169691Skan	if (info) {
269169691Skan		free(info, M_RATEFEEDER);
270169691Skan	}
271169691Skan	f->data = NULL;
272169691Skan	return 0;
273169691Skan}
274169691Skan
275169691Skanstatic int
276169691Skanconvert_stereo_up(struct feed_rate_info *info,
277169691Skan		  uint32_t		 src_ticks,
278169691Skan		  uint32_t		 dst_ticks,
279169691Skan		  int16_t		*dst)
280169691Skan{
281169691Skan	uint32_t max_dst_ticks;
282169691Skan	int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o;
283169691Skan	int16_t *src;
284169691Skan
285169691Skan	sp = info->buffer_pos * 2;
286169691Skan	se = sp + src_ticks * 2;
287169691Skan
288169691Skan	src = info->buffer;
289169691Skan	alpha = info->alpha * info->mscale;
290169691Skan	dalpha = info->dscale * info->mscale; /* Alpha increment */
291169691Skan	malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */
292169691Skan	mroll = info->mroll;
293169691Skan
294169691Skan	/*
295169691Skan	 * For efficiency the main conversion loop should only depend on
296169691Skan	 * one variable.  We use the state to work out the maximum number
297169691Skan	 * of output samples that are available and eliminate the checking of
298169691Skan	 * sp from the loop.
299169691Skan	 */
300169691Skan	max_dst_ticks = src_ticks * info->dst / info->src - alpha / dalpha;
301169691Skan	if (max_dst_ticks < dst_ticks) {
302169691Skan		dst_ticks = max_dst_ticks;
303169691Skan	}
304169691Skan
305169691Skan	dp = 0;
306169691Skan	de = dst_ticks * 2;
307169691Skan	/*
308169691Skan	 * Unrolling this loop manually does not help much here because
309169691Skan	 * of the alpha, malpha comparison.
310169691Skan	 */
311169691Skan	while (dp < de) {
312169691Skan		o = malpha - alpha;
313169691Skan		x = alpha * src[sp + 2] + o * src[sp];
314169691Skan		dst[dp++] = x >> mroll;
315169691Skan		x = alpha * src[sp + 3] + o * src[sp + 1];
316169691Skan		dst[dp++] = x >> mroll;
317169691Skan		alpha += dalpha;
318169691Skan		if (alpha >= malpha) {
319169691Skan			alpha -= malpha;
320169691Skan			sp += 2;
321169691Skan		}
322169691Skan	}
323169691Skan	RATE_ASSERT(sp <= se, ("%s: Source overrun\n", __func__));
324169691Skan
325169691Skan	info->buffer_pos = sp / info->channels;
326169691Skan	info->alpha = alpha / info->mscale;
327169691Skan
328169691Skan	return dp / info->channels;
329169691Skan}
330169691Skan
331169691Skanstatic int
332169691Skanconvert_stereo_down(struct feed_rate_info *info,
333169691Skan		    uint32_t		   src_ticks,
334169691Skan		    uint32_t		   dst_ticks,
335169691Skan		    int16_t		  *dst)
336169691Skan{
337169691Skan	int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o, m,
338169691Skan		mdalpha, mstep;
339169691Skan	int16_t *src;
340169691Skan
341169691Skan	sp = info->buffer_pos * 2;
342169691Skan	se = sp + src_ticks * 2;
343169691Skan
344169691Skan	src = info->buffer;
345169691Skan	alpha = info->alpha * info->mscale;
346169691Skan	dalpha = info->dscale * info->mscale; /* Alpha increment */
347169691Skan	malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */
348169691Skan	mroll = info->mroll;
349169691Skan
350169691Skan	dp = 0;
351169691Skan	de = dst_ticks * 2;
352169691Skan
353169691Skan	m = dalpha / malpha;
354169691Skan	mstep = m * 2;
355169691Skan	mdalpha = dalpha - m * malpha;
356169691Skan
357169691Skan	/*
358169691Skan	 * TODO: eliminate sp or dp from this loop comparison for a few
359169691Skan	 * extra % performance.
360169691Skan	 */
361169691Skan	while (sp < se && dp < de) {
362169691Skan		o = malpha - alpha;
363169691Skan		x = alpha * src[sp + 2] + o * src[sp];
364169691Skan		dst[dp++] = x >> mroll;
365169691Skan		x = alpha * src[sp + 3] + o * src[sp + 1];
366169691Skan		dst[dp++] = x >> mroll;
367169691Skan
368169691Skan		alpha += mdalpha;
369169691Skan		sp += mstep;
370169691Skan		if (alpha >= malpha) {
371169691Skan			alpha -= malpha;
372169691Skan			sp += 2;
373169691Skan		}
374169691Skan	}
375169691Skan
376169691Skan	info->buffer_pos = sp / 2;
377169691Skan	info->alpha = alpha / info->mscale;
378169691Skan
379169691Skan	RATE_ASSERT(info->buffer_pos <= info->buffer_ticks,
380169691Skan		    ("%s: Source overrun\n", __func__));
381169691Skan
382169691Skan	return dp / 2;
383169691Skan}
384169691Skan
385169691Skanstatic int
386169691Skanfeed_rate(struct pcm_feeder	*f,
387169691Skan	  struct pcm_channel	*c,
388169691Skan	  uint8_t		*b,
389169691Skan	  uint32_t		 count,
390169691Skan	  void			*source)
391169691Skan{
392169691Skan	struct feed_rate_info *info = f->data;
393169691Skan
394169691Skan	uint32_t done, s_ticks, d_ticks;
395169691Skan	done = 0;
396169691Skan
397169691Skan	RATE_ASSERT(info->channels == 2,
398169691Skan		    ("%s: channels (%d) != 2", __func__, info->channels));
399169691Skan
400169691Skan	while (done < count) {
401169691Skan		/* Slurp in more data if input buffer is not full */
402169691Skan		while (info->buffer_ticks < src_ticks_per_cycle(info)) {
403169691Skan			uint8_t *u8b;
404169691Skan			int	 fetch;
405169691Skan			fetch = src_bytes_per_cycle(info) -
406169691Skan				info->buffer_ticks * bytes_per_tick(info);
407169691Skan			u8b = (uint8_t*)info->buffer +
408169691Skan				(info->buffer_ticks + 1) *
409169691Skan				bytes_per_tick(info);
410169691Skan			fetch = FEEDER_FEED(f->source, c, u8b, fetch, source);
411169691Skan			RATE_ASSERT(fetch % bytes_per_tick(info) == 0,
412169691Skan				    ("%s: fetched unaligned bytes (%d)",
413169691Skan				     __func__, fetch));
414169691Skan			info->buffer_ticks += fetch / bytes_per_tick(info);
415169691Skan			RATE_ASSERT(src_ticks_per_cycle(info) >=
416169691Skan				    info->buffer_ticks,
417169691Skan				    ("%s: buffer overfilled (%d > %d).",
418169691Skan				     __func__, info->buffer_ticks,
419169691Skan				 src_ticks_per_cycle(info)));
420169691Skan			if (fetch == 0)
421169691Skan				break;
422169691Skan		}
423169691Skan
424169691Skan		/* Find amount of input buffer data that should be processed */
425169691Skan		d_ticks = (count - done) / bytes_per_tick(info);
426169691Skan		s_ticks = info->buffer_ticks - info->buffer_pos;
427169691Skan		if (info->buffer_ticks != src_ticks_per_cycle(info)) {
428169691Skan			if (s_ticks > 8)
429169691Skan				s_ticks -= 8;
430169691Skan			else
431169691Skan				s_ticks = 0;
432169691Skan		}
433169691Skan
434169691Skan		d_ticks = info->convert(info, s_ticks, d_ticks,
435169691Skan					(int16_t*)(b + done));
436169691Skan		if (d_ticks == 0)
437169691Skan			break;
438169691Skan		done += d_ticks * bytes_per_tick(info);
439169691Skan
440169691Skan		RATE_ASSERT(info->buffer_pos <= info->buffer_ticks,
441169691Skan			    ("%s: buffer_ticks too big\n", __func__));
442169691Skan		RATE_ASSERT(info->buffer_ticks <= src_ticks_per_cycle(info),
443169691Skan			    ("too many ticks %d /  %d\n",
444169691Skan			     info->buffer_ticks, src_ticks_per_cycle(info)));
445169691Skan		RATE_TRACE("%s: ticks %5d / %d pos %d\n", __func__,
446169691Skan			   info->buffer_ticks, src_ticks_per_cycle(info),
447169691Skan			   info->buffer_pos);
448169691Skan
449169691Skan		if (src_ticks_per_cycle(info) <= info->buffer_pos) {
450169691Skan			/* End of cycle reached, copy last samples to start */
451169691Skan			uint8_t *u8b;
452169691Skan			u8b = (uint8_t*)info->buffer;
453169691Skan			bcopy(u8b + src_bytes_per_cycle(info), u8b,
454169691Skan			      bytes_per_tick(info));
455169691Skan
456169691Skan			RATE_ASSERT(info->alpha == 0,
457169691Skan				    ("%s: completed cycle with "
458169691Skan				     "alpha non-zero", __func__, info->alpha));
459169691Skan
460169691Skan			info->buffer_pos = 0;
461169691Skan			info->buffer_ticks = 0;
462169691Skan		}
463169691Skan	}
464169691Skan
465169691Skan	RATE_ASSERT(count >= done,
466169691Skan		    ("%s: generated too many bytes of data (%d > %d).",
467169691Skan		     __func__, done, count));
468169691Skan
469169691Skan	if (done != count) {
470169691Skan		RATE_TRACE("Only did %d of %d\n", done, count);
471169691Skan	}
472169691Skan
473169691Skan	return done;
474169691Skan}
475169691Skan
476169691Skanstatic struct pcm_feederdesc feeder_rate_desc[] = {
477169691Skan	{FEEDER_RATE, AFMT_S16_LE | AFMT_STEREO, AFMT_S16_LE | AFMT_STEREO, 0},
478169691Skan	{0, 0, 0, 0},
479169691Skan};
480169691Skanstatic kobj_method_t feeder_rate_methods[] = {
481169691Skan    	KOBJMETHOD(feeder_init,		feed_rate_init),
482169691Skan    	KOBJMETHOD(feeder_free,		feed_rate_free),
483169691Skan    	KOBJMETHOD(feeder_set,		feed_rate_set),
484169691Skan    	KOBJMETHOD(feeder_get,		feed_rate_get),
485169691Skan    	KOBJMETHOD(feeder_feed,		feed_rate),
486169691Skan	{0, 0}
487169691Skan};
488169691SkanFEEDER_DECLARE(feeder_rate, 2, NULL);
489169691Skan
490169691Skan