feeder_rate.c revision 110108
1/*
2 * Copyright (c) 2003 Orion Hodson <orion@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * MAINTAINER: Orion Hodson <orion@freebsd.org>
27 *
28 * This rate conversion code uses linear interpolation without any
29 * pre- or post- interpolation filtering to combat aliasing.  This
30 * greatly limits the sound quality and should be addressed at some
31 * stage in the future.
32 *
33 * Since this accuracy of interpolation is sensitive and examination
34 * of the algorithm output is harder from the kernel, th code is
35 * designed to be compiled in the kernel and in a userland test
36 * harness.  This is done by selectively including and excluding code
37 * with several portions based on whether _KERNEL is defined.  It's a
38 * little ugly, but exceedingly useful.  The testsuite and its
39 * revisions can be found at:
40 *		http://people.freebsd.org/~orion/feedrate/
41 *
42 * Special thanks to Ken Marx for exposing flaws in the code and for
43 * testing revisions.
44 */
45
46#ifdef _KERNEL
47
48#include <dev/sound/pcm/sound.h>
49#include "feeder_if.h"
50
51SND_DECLARE_FILE("$FreeBSD: head/sys/dev/sound/pcm/feeder_rate.c 110108 2003-01-30 16:32:56Z orion $");
52MALLOC_DEFINE(M_RATEFEEDER, "ratefeed", "pcm rate feeder");
53
54#define RATE_ASSERT(x, y) /* KASSERT(x,y) */
55#define RATE_TRACE(x...) /* printf(x) */
56
57#endif /* _KERNEL */
58
59/*****************************************************************************/
60/* All of the following coefficients are coupled.  They are chosen to be
61 * good in the operating space 4000-96000kHz work.  Decreasing the
62 * granularity increases the required buffer size and affects the gain
63 * values at different points in the space.  These values were found by
64 * running the test program with -p (probe) and some trial and error.
65 *
66 * ROUNDHZ	the granularity of sample rates (fits n*11025 and n*8000).
67 * FEEDBUFSZ	the amount of buffer space.
68 * MINGAIN	the minimum acceptable gain in coefficients search.
69 */
70#define ROUNDHZ			   25
71#define FEEDBUFSZ		 8192
72#define MINGAIN			   92
73
74#define RATEMIN 		 4000
75#define RATEMAX			96000
76
77struct feed_rate_info;
78
79typedef int (*rate_convert_method)(struct feed_rate_info *,
80				   uint32_t, uint32_t, int16_t *);
81
82static int
83convert_stereo_up(struct feed_rate_info *info,
84		  uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst);
85
86static int
87convert_stereo_down(struct feed_rate_info *info,
88		    uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst);
89
90struct feed_rate_info {
91	uint32_t src, dst;	/* source and destination rates */
92	uint16_t buffer_ticks;	/* number of available samples in buffer */
93	uint16_t buffer_pos;	/* next available sample in buffer */
94	uint16_t rounds; 	/* maximum number of cycle rounds w buffer */
95	uint16_t alpha;		/* interpolation distance */
96        uint16_t sscale;        /* src clock scale */
97        uint16_t dscale;        /* dst clock scale */
98        uint16_t mscale;        /* scale factor to avoid divide per sample */
99        uint16_t mroll;         /* roll to again avoid divide per sample */
100	uint16_t channels;	/* 1 = mono, 2 = stereo */
101
102	rate_convert_method convert;
103    	int16_t  buffer[FEEDBUFSZ];
104};
105
106#define bytes_per_sample		2
107#define src_ticks_per_cycle(info)	(info->dscale * info->rounds)
108#define dst_ticks_per_cycle(info)	(info->sscale * info->rounds)
109#define bytes_per_tick(info)		(info->channels * bytes_per_sample)
110#define src_bytes_per_cycle(info) 					      \
111        		(src_ticks_per_cycle(info) * bytes_per_tick(info))
112#define dst_bytes_per_cycle(info) 					      \
113        		(dst_ticks_per_cycle(info) * bytes_per_tick(info))
114
115static uint32_t
116gcd(uint32_t x, uint32_t y)
117{
118	uint32_t w;
119	while (y != 0) {
120		w = x % y;
121		x = y;
122		y = w;
123	}
124	return x;
125}
126
127static int
128feed_rate_setup(struct pcm_feeder *f)
129{
130	struct feed_rate_info *info = f->data;
131        uint32_t mscale, mroll, l, r, g;
132
133	/* Beat sample rates down by greatest common divisor */
134	g = gcd(info->src, info->dst);
135	info->sscale = info->dst / g;
136	info->dscale = info->src / g;
137
138	info->alpha = 0;
139	info->buffer_ticks = 0;
140	info->buffer_pos = 0;
141
142	/* Pick suitable conversion routine */
143	if (info->src > info->dst) {
144		info->convert = convert_stereo_down;
145	} else {
146		info->convert = convert_stereo_up;
147	}
148
149	/*
150	 * Determine number of conversion rounds that will fit into
151	 * buffer.  NB Must set info->rounds to one before using
152	 * src_ticks_per_cycle here since it used by src_ticks_per_cycle.
153	 */
154	info->rounds = 1;
155	r = (FEEDBUFSZ - bytes_per_tick(info)) /
156		(src_ticks_per_cycle(info) * bytes_per_tick(info));
157	if (r == 0) {
158		RATE_TRACE("Insufficient buffer space for conversion %d -> %d "
159			   "(%d < %d)\n", info->src, info->dst, FEEDBUFSZ,
160			   src_ticks_per_cycle(info) * bytes_per_tick(info));
161		return -1;
162	}
163	info->rounds = r;
164
165	/*
166	 * Find scale and roll combination that allows us to trade
167	 * costly divide operations in the main loop for multiply-rolls.
168	 */
169        for (l = 96; l >= MINGAIN; l -= 3) {
170		for (mroll = 0; mroll < 16; mroll ++) {
171			mscale = (1 << mroll) / info->sscale;
172
173                        r = (mscale * info->sscale * 100) >> mroll;
174                        if (r > l && r <= 100) {
175                                info->mscale = mscale;
176                                info->mroll = mroll;
177                                RATE_TRACE("Converting %d to %d with "
178					   "mscale = %d and mroll = %d "
179					   "(gain = %d / 100)\n",
180					   info->src, info->dst,
181					   info->mscale, info->mroll, r);
182                                return 0;
183                        }
184                }
185        }
186
187	RATE_TRACE("Failed to find a converter within %d%% gain for "
188		   "%d to %d.\n", l, info->src, info->dst);
189
190        return -2;
191}
192
193static int
194feed_rate_set(struct pcm_feeder *f, int what, int value)
195{
196	struct feed_rate_info *info = f->data;
197	int rvalue;
198
199	if (value < RATEMIN || value > RATEMAX) {
200		return -1;
201	}
202
203	rvalue = (value / ROUNDHZ) * ROUNDHZ;
204	if (value - rvalue > ROUNDHZ / 2) {
205	    rvalue += ROUNDHZ;
206	}
207
208	switch(what) {
209	case FEEDRATE_SRC:
210		info->src = rvalue;
211		break;
212	case FEEDRATE_DST:
213		info->dst = rvalue;
214		break;
215	default:
216		return -1;
217	}
218
219	return feed_rate_setup(f);
220}
221
222static int
223feed_rate_get(struct pcm_feeder *f, int what)
224{
225	struct feed_rate_info *info = f->data;
226
227	switch(what) {
228	case FEEDRATE_SRC:
229		return info->src;
230	case FEEDRATE_DST:
231		return info->dst;
232	default:
233		return -1;
234	}
235	return -1;
236}
237
238static int
239feed_rate_init(struct pcm_feeder *f)
240{
241	struct feed_rate_info *info;
242
243	info = malloc(sizeof(*info), M_RATEFEEDER, M_NOWAIT | M_ZERO);
244	if (info == NULL)
245		return ENOMEM;
246
247	info->src = DSP_DEFAULT_SPEED;
248	info->dst = DSP_DEFAULT_SPEED;
249	info->channels = 2;
250
251	f->data = info;
252	return 0;
253}
254
255static int
256feed_rate_free(struct pcm_feeder *f)
257{
258	struct feed_rate_info *info = f->data;
259
260	if (info) {
261		free(info, M_RATEFEEDER);
262	}
263	f->data = NULL;
264	return 0;
265}
266
267static int
268convert_stereo_up(struct feed_rate_info *info,
269		  uint32_t		 src_ticks,
270		  uint32_t		 dst_ticks,
271		  int16_t		*dst)
272{
273	uint32_t max_dst_ticks;
274	int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o;
275	int16_t *src;
276
277	sp = info->buffer_pos * 2;
278	se = sp + src_ticks * 2;
279
280	src = info->buffer;
281	alpha = info->alpha * info->mscale;
282	dalpha = info->dscale * info->mscale; /* Alpha increment */
283	malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */
284	mroll = info->mroll;
285
286	/*
287	 * For efficiency the main conversion loop should only depend on
288	 * one variable.  We use the state to work out the maximum number
289	 * of output samples that are available and eliminate the checking of
290	 * sp from the loop.
291	 */
292	max_dst_ticks = src_ticks * info->dst / info->src - alpha / dalpha;
293	if (max_dst_ticks < dst_ticks) {
294		dst_ticks = max_dst_ticks;
295	}
296
297	dp = 0;
298	de = dst_ticks * 2;
299	/*
300	 * Unrolling this loop manually does not help much here because
301	 * of the alpha, malpha comparison.
302	 */
303	while (dp < de) {
304		o = malpha - alpha;
305		x = alpha * src[sp + 2] + o * src[sp];
306		dst[dp++] = x >> mroll;
307		x = alpha * src[sp + 3] + o * src[sp + 1];
308		dst[dp++] = x >> mroll;
309		alpha += dalpha;
310		if (alpha >= malpha) {
311			alpha -= malpha;
312			sp += 2;
313		}
314	}
315	RATE_ASSERT(sp <= se, ("%s: Source overrun\n", __func__));
316
317	info->buffer_pos = sp / info->channels;
318	info->alpha = alpha / info->mscale;
319
320	return dp / info->channels;
321}
322
323static int
324convert_stereo_down(struct feed_rate_info *info,
325		    uint32_t		   src_ticks,
326		    uint32_t		   dst_ticks,
327		    int16_t		  *dst)
328{
329	int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o, m,
330		mdalpha, mstep;
331	int16_t *src;
332
333	sp = info->buffer_pos * 2;
334	se = sp + src_ticks * 2;
335
336	src = info->buffer;
337	alpha = info->alpha * info->mscale;
338	dalpha = info->dscale * info->mscale; /* Alpha increment */
339	malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */
340	mroll = info->mroll;
341
342	dp = 0;
343	de = dst_ticks * 2;
344
345	m = dalpha / malpha;
346	mstep = m * 2;
347	mdalpha = dalpha - m * malpha;
348
349	/*
350	 * TODO: eliminate sp or dp from this loop comparison for a few
351	 * extra % performance.
352	 */
353	while (sp < se && dp < de) {
354		o = malpha - alpha;
355		x = alpha * src[sp + 2] + o * src[sp];
356		dst[dp++] = x >> mroll;
357		x = alpha * src[sp + 3] + o * src[sp + 1];
358		dst[dp++] = x >> mroll;
359
360		alpha += mdalpha;
361		sp += mstep;
362		if (alpha >= malpha) {
363			alpha -= malpha;
364			sp += 2;
365		}
366	}
367
368	info->buffer_pos = sp / 2;
369	info->alpha = alpha / info->mscale;
370
371	RATE_ASSERT(info->buffer_pos <= info->buffer_ticks,
372		    ("%s: Source overrun\n", __func__));
373
374	return dp / 2;
375}
376
377static int
378feed_rate(struct pcm_feeder	*f,
379	  struct pcm_channel	*c,
380	  uint8_t		*b,
381	  uint32_t		 count,
382	  void			*source)
383{
384	struct feed_rate_info *info = f->data;
385
386	uint32_t done, s_ticks, d_ticks;
387	done = 0;
388
389	RATE_ASSERT(info->channels == 2,
390		    ("%s: channels (%d) != 2", __func__, info->channels));
391
392	while (done < count) {
393		/* Slurp in more data if input buffer is not full */
394		while (info->buffer_ticks < src_ticks_per_cycle(info)) {
395			uint8_t *u8b;
396			int	 fetch;
397			fetch = src_bytes_per_cycle(info) -
398				info->buffer_ticks * bytes_per_tick(info);
399			u8b = (uint8_t*)info->buffer +
400				(info->buffer_ticks + 1) *
401				bytes_per_tick(info);
402			fetch = FEEDER_FEED(f->source, c, u8b, fetch, source);
403			RATE_ASSERT(fetch % bytes_per_tick(info) == 0,
404				    ("%s: fetched unaligned bytes (%d)",
405				     __func__, fetch));
406			info->buffer_ticks += fetch / bytes_per_tick(info);
407			RATE_ASSERT(src_ticks_per_cycle(info) >=
408				    info->buffer_ticks,
409				    ("%s: buffer overfilled (%d > %d).",
410				     __func__, info->buffer_ticks,
411				 src_ticks_per_cycle(info)));
412			if (fetch == 0)
413				break;
414		}
415
416		/* Find amount of input buffer data that should be processed */
417		d_ticks = (count - done) / bytes_per_tick(info);
418		s_ticks = info->buffer_ticks - info->buffer_pos;
419		if (info->buffer_ticks != src_ticks_per_cycle(info)) {
420			if (s_ticks > 8)
421				s_ticks -= 8;
422			else
423				s_ticks = 0;
424		}
425
426		d_ticks = info->convert(info, s_ticks, d_ticks,
427					(int16_t*)(b + done));
428		if (d_ticks == 0)
429			break;
430		done += d_ticks * bytes_per_tick(info);
431
432		RATE_ASSERT(info->buffer_pos <= info->buffer_ticks,
433			    ("%s: buffer_ticks too big\n", __func__));
434		RATE_TRACE("%s: ticks %5d pos %d\n", __func__,
435			   info->buffer_ticks, info->buffer_pos);
436
437		if (src_ticks_per_cycle(info) <= info->buffer_pos) {
438			/* End of cycle reached, copy last samples to start */
439			uint8_t *u8b;
440			u8b = (uint8_t*)info->buffer;
441			bcopy(u8b + src_bytes_per_cycle(info), u8b,
442			      bytes_per_tick(info));
443
444			RATE_ASSERT(info->alpha == 0,
445				    ("%s: completed cycle with "
446				     "alpha non-zero", __func__, info->alpha));
447
448			info->buffer_pos = 0;
449			info->buffer_ticks = 0;
450		}
451	}
452
453	RATE_ASSERT(count >= done,
454		    ("%s: generated too many bytes of data (%d > %d).",
455		     __func__, done, count));
456
457	if (done != count) {
458		RATE_TRACE("Only did %d of %d\n", done, count);
459	}
460
461	return done;
462}
463
464#ifdef _KERNEL
465
466static struct pcm_feederdesc feeder_rate_desc[] = {
467	{FEEDER_RATE, AFMT_S16_LE | AFMT_STEREO, AFMT_S16_LE | AFMT_STEREO, 0},
468	{0},
469};
470static kobj_method_t feeder_rate_methods[] = {
471    	KOBJMETHOD(feeder_init,		feed_rate_init),
472    	KOBJMETHOD(feeder_free,		feed_rate_free),
473    	KOBJMETHOD(feeder_set,		feed_rate_set),
474    	KOBJMETHOD(feeder_get,		feed_rate_get),
475    	KOBJMETHOD(feeder_feed,		feed_rate),
476	{ 0, 0 }
477};
478FEEDER_DECLARE(feeder_rate, 2, NULL);
479
480#endif /* _KERNEL */
481