feeder_rate.c revision 110466
1283625Sdim/*
2283625Sdim * Copyright (c) 2003 Orion Hodson <orion@freebsd.org>
3283625Sdim * All rights reserved.
4283625Sdim *
5283625Sdim * Redistribution and use in source and binary forms, with or without
6283625Sdim * modification, are permitted provided that the following conditions
7283625Sdim * are met:
8283625Sdim * 1. Redistributions of source code must retain the above copyright
9283625Sdim *    notice, this list of conditions and the following disclaimer.
10283625Sdim * 2. Redistributions in binary form must reproduce the above copyright
11283625Sdim *    notice, this list of conditions and the following disclaimer in the
12283625Sdim *    documentation and/or other materials provided with the distribution.
13283625Sdim *
14283625Sdim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15283625Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16283625Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17283625Sdim * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18283625Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19283625Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20283625Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21283625Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22283625Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23283625Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24283625Sdim * SUCH DAMAGE.
25283625Sdim *
26283625Sdim * MAINTAINER: Orion Hodson <orion@freebsd.org>
27283625Sdim *
28283625Sdim * This rate conversion code uses linear interpolation without any
29283625Sdim * pre- or post- interpolation filtering to combat aliasing.  This
30283625Sdim * greatly limits the sound quality and should be addressed at some
31283625Sdim * stage in the future.
32283625Sdim *
33283625Sdim * Since this accuracy of interpolation is sensitive and examination
34283625Sdim * of the algorithm output is harder from the kernel, th code is
35283625Sdim * designed to be compiled in the kernel and in a userland test
36283625Sdim * harness.  This is done by selectively including and excluding code
37283625Sdim * with several portions based on whether _KERNEL is defined.  It's a
38283625Sdim * little ugly, but exceedingly useful.  The testsuite and its
39283625Sdim * revisions can be found at:
40 *		http://people.freebsd.org/~orion/feedrate/
41 *
42 * Special thanks to Ken Marx for exposing flaws in the code and for
43 * testing revisions.
44 */
45
46#ifdef _KERNEL
47
48#include <dev/sound/pcm/sound.h>
49#include "feeder_if.h"
50
51SND_DECLARE_FILE("$FreeBSD: head/sys/dev/sound/pcm/feeder_rate.c 110466 2003-02-06 17:32:02Z orion $");
52
53#endif /* _KERNEL */
54
55MALLOC_DEFINE(M_RATEFEEDER, "ratefeed", "pcm rate feeder");
56
57#ifndef RATE_ASSERT
58#define RATE_ASSERT(x, y) /* KASSERT(x) */
59#endif /* RATE_ASSERT */
60
61#ifndef RATE_TRACE
62#define RATE_TRACE(x...)  /* printf(x) */
63#endif
64
65/*****************************************************************************/
66
67/* The following coefficients are coupled.  They are chosen to be
68 * guarantee calculable factors for the interpolation routine.  They
69 * have been tested over the range of RATEMIN-RATEMAX Hz.  Decreasing
70 * the granularity increases the required buffer size and affects the
71 * gain values at different points in the space.  These values were
72 * found by running the test program with -p (probe) and some trial
73 * and error.
74 *
75 * ROUNDHZ	the granularity of sample rates (fits n*11025 and n*8000).
76 * FEEDBUFSZ	the amount of buffer space.
77 * MINGAIN	the minimum acceptable gain in coefficients search.
78 */
79#define ROUNDHZ			   25
80#define FEEDBUFSZ 		 8192
81#define MINGAIN			   92
82
83#define RATEMIN  		 4000
84#define RATEMAX 		48000
85
86struct feed_rate_info;
87
88typedef int (*rate_convert_method)(struct feed_rate_info *,
89				   uint32_t, uint32_t, int16_t *);
90
91static int
92convert_stereo_up(struct feed_rate_info *info,
93		  uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst);
94
95static int
96convert_stereo_down(struct feed_rate_info *info,
97		    uint32_t src_ticks, uint32_t dst_ticks, int16_t *dst);
98
99struct feed_rate_info {
100	uint32_t src, dst;	/* source and destination rates */
101	uint16_t buffer_ticks;	/* number of available samples in buffer */
102	uint16_t buffer_pos;	/* next available sample in buffer */
103	uint16_t rounds; 	/* maximum number of cycle rounds w buffer */
104	uint16_t alpha;		/* interpolation distance */
105        uint16_t sscale;        /* src clock scale */
106        uint16_t dscale;        /* dst clock scale */
107        uint16_t mscale;        /* scale factor to avoid divide per sample */
108        uint16_t mroll;         /* roll to again avoid divide per sample */
109	uint16_t channels;	/* 1 = mono, 2 = stereo */
110
111	rate_convert_method convert;
112    	int16_t  buffer[FEEDBUFSZ];
113};
114
115#define bytes_per_sample		2
116#define src_ticks_per_cycle(info)	(info->dscale * info->rounds)
117#define dst_ticks_per_cycle(info)	(info->sscale * info->rounds)
118#define bytes_per_tick(info)		(info->channels * bytes_per_sample)
119#define src_bytes_per_cycle(info) 					      \
120        		(src_ticks_per_cycle(info) * bytes_per_tick(info))
121#define dst_bytes_per_cycle(info) 					      \
122        		(dst_ticks_per_cycle(info) * bytes_per_tick(info))
123
124static uint32_t
125gcd(uint32_t x, uint32_t y)
126{
127	uint32_t w;
128	while (y != 0) {
129		w = x % y;
130		x = y;
131		y = w;
132	}
133	return x;
134}
135
136static int
137feed_rate_setup(struct pcm_feeder *f)
138{
139	struct feed_rate_info *info = f->data;
140        uint32_t mscale, mroll, l, r, g;
141
142	/* Beat sample rates down by greatest common divisor */
143	g = gcd(info->src, info->dst);
144	info->sscale = info->dst / g;
145	info->dscale = info->src / g;
146
147	info->alpha = 0;
148	info->buffer_ticks = 0;
149	info->buffer_pos = 0;
150
151	/* Pick suitable conversion routine */
152	if (info->src > info->dst) {
153		info->convert = convert_stereo_down;
154	} else {
155		info->convert = convert_stereo_up;
156	}
157
158	/*
159	 * Determine number of conversion rounds that will fit into
160	 * buffer.  NB Must set info->rounds to one before using
161	 * src_ticks_per_cycle here since it used by src_ticks_per_cycle.
162	 */
163	info->rounds = 1;
164	r = (FEEDBUFSZ - bytes_per_tick(info)) /
165		(src_ticks_per_cycle(info) * bytes_per_tick(info));
166	if (r == 0) {
167		RATE_TRACE("Insufficient buffer space for conversion %d -> %d "
168			   "(%d < %d)\n", info->src, info->dst, FEEDBUFSZ,
169			   src_ticks_per_cycle(info) * bytes_per_tick(info));
170		return -1;
171	}
172	info->rounds = r;
173
174	/*
175	 * Find scale and roll combination that allows us to trade
176	 * costly divide operations in the main loop for multiply-rolls.
177	 */
178        for (l = 96; l >= MINGAIN; l -= 3) {
179		for (mroll = 0; mroll < 16; mroll ++) {
180			mscale = (1 << mroll) / info->sscale;
181
182                        r = (mscale * info->sscale * 100) >> mroll;
183                        if (r > l && r <= 100) {
184                                info->mscale = mscale;
185                                info->mroll = mroll;
186                                RATE_TRACE("Converting %d to %d with "
187					   "mscale = %d and mroll = %d "
188					   "(gain = %d / 100)\n",
189					   info->src, info->dst,
190					   info->mscale, info->mroll, r);
191                                return 0;
192                        }
193                }
194        }
195
196	RATE_TRACE("Failed to find a converter within %d%% gain for "
197		   "%d to %d.\n", l, info->src, info->dst);
198
199        return -2;
200}
201
202static int
203feed_rate_set(struct pcm_feeder *f, int what, int value)
204{
205	struct feed_rate_info *info = f->data;
206	int rvalue;
207
208	if (value < RATEMIN || value > RATEMAX) {
209		return -1;
210	}
211
212	rvalue = (value / ROUNDHZ) * ROUNDHZ;
213	if (value - rvalue > ROUNDHZ / 2) {
214	    rvalue += ROUNDHZ;
215	}
216
217	switch(what) {
218	case FEEDRATE_SRC:
219		info->src = rvalue;
220		break;
221	case FEEDRATE_DST:
222		info->dst = rvalue;
223		break;
224	default:
225		return -1;
226	}
227
228	return feed_rate_setup(f);
229}
230
231static int
232feed_rate_get(struct pcm_feeder *f, int what)
233{
234	struct feed_rate_info *info = f->data;
235
236	switch(what) {
237	case FEEDRATE_SRC:
238		return info->src;
239	case FEEDRATE_DST:
240		return info->dst;
241	default:
242		return -1;
243	}
244	return -1;
245}
246
247static int
248feed_rate_init(struct pcm_feeder *f)
249{
250	struct feed_rate_info *info;
251
252	info = malloc(sizeof(*info), M_RATEFEEDER, M_NOWAIT | M_ZERO);
253	if (info == NULL)
254		return ENOMEM;
255
256	info->src = DSP_DEFAULT_SPEED;
257	info->dst = DSP_DEFAULT_SPEED;
258	info->channels = 2;
259
260	f->data = info;
261	return 0;
262}
263
264static int
265feed_rate_free(struct pcm_feeder *f)
266{
267	struct feed_rate_info *info = f->data;
268
269	if (info) {
270		free(info, M_RATEFEEDER);
271	}
272	f->data = NULL;
273	return 0;
274}
275
276static int
277convert_stereo_up(struct feed_rate_info *info,
278		  uint32_t		 src_ticks,
279		  uint32_t		 dst_ticks,
280		  int16_t		*dst)
281{
282	uint32_t max_dst_ticks;
283	int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o;
284	int16_t *src;
285
286	sp = info->buffer_pos * 2;
287	se = sp + src_ticks * 2;
288
289	src = info->buffer;
290	alpha = info->alpha * info->mscale;
291	dalpha = info->dscale * info->mscale; /* Alpha increment */
292	malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */
293	mroll = info->mroll;
294
295	/*
296	 * For efficiency the main conversion loop should only depend on
297	 * one variable.  We use the state to work out the maximum number
298	 * of output samples that are available and eliminate the checking of
299	 * sp from the loop.
300	 */
301	max_dst_ticks = src_ticks * info->dst / info->src - alpha / dalpha;
302	if (max_dst_ticks < dst_ticks) {
303		dst_ticks = max_dst_ticks;
304	}
305
306	dp = 0;
307	de = dst_ticks * 2;
308	/*
309	 * Unrolling this loop manually does not help much here because
310	 * of the alpha, malpha comparison.
311	 */
312	while (dp < de) {
313		o = malpha - alpha;
314		x = alpha * src[sp + 2] + o * src[sp];
315		dst[dp++] = x >> mroll;
316		x = alpha * src[sp + 3] + o * src[sp + 1];
317		dst[dp++] = x >> mroll;
318		alpha += dalpha;
319		if (alpha >= malpha) {
320			alpha -= malpha;
321			sp += 2;
322		}
323	}
324	RATE_ASSERT(sp <= se, ("%s: Source overrun\n", __func__));
325
326	info->buffer_pos = sp / info->channels;
327	info->alpha = alpha / info->mscale;
328
329	return dp / info->channels;
330}
331
332static int
333convert_stereo_down(struct feed_rate_info *info,
334		    uint32_t		   src_ticks,
335		    uint32_t		   dst_ticks,
336		    int16_t		  *dst)
337{
338	int32_t alpha, dalpha, malpha, mroll, sp, dp, se, de, x, o, m,
339		mdalpha, mstep;
340	int16_t *src;
341
342	sp = info->buffer_pos * 2;
343	se = sp + src_ticks * 2;
344
345	src = info->buffer;
346	alpha = info->alpha * info->mscale;
347	dalpha = info->dscale * info->mscale; /* Alpha increment */
348	malpha = info->sscale * info->mscale; /* Maximum allowed alpha value */
349	mroll = info->mroll;
350
351	dp = 0;
352	de = dst_ticks * 2;
353
354	m = dalpha / malpha;
355	mstep = m * 2;
356	mdalpha = dalpha - m * malpha;
357
358	/*
359	 * TODO: eliminate sp or dp from this loop comparison for a few
360	 * extra % performance.
361	 */
362	while (sp < se && dp < de) {
363		o = malpha - alpha;
364		x = alpha * src[sp + 2] + o * src[sp];
365		dst[dp++] = x >> mroll;
366		x = alpha * src[sp + 3] + o * src[sp + 1];
367		dst[dp++] = x >> mroll;
368
369		alpha += mdalpha;
370		sp += mstep;
371		if (alpha >= malpha) {
372			alpha -= malpha;
373			sp += 2;
374		}
375	}
376
377	info->buffer_pos = sp / 2;
378	info->alpha = alpha / info->mscale;
379
380	RATE_ASSERT(info->buffer_pos <= info->buffer_ticks,
381		    ("%s: Source overrun\n", __func__));
382
383	return dp / 2;
384}
385
386static int
387feed_rate(struct pcm_feeder	*f,
388	  struct pcm_channel	*c,
389	  uint8_t		*b,
390	  uint32_t		 count,
391	  void			*source)
392{
393	struct feed_rate_info *info = f->data;
394
395	uint32_t done, s_ticks, d_ticks;
396	done = 0;
397
398	RATE_ASSERT(info->channels == 2,
399		    ("%s: channels (%d) != 2", __func__, info->channels));
400
401	while (done < count) {
402		/* Slurp in more data if input buffer is not full */
403		while (info->buffer_ticks < src_ticks_per_cycle(info)) {
404			uint8_t *u8b;
405			int	 fetch;
406			fetch = src_bytes_per_cycle(info) -
407				info->buffer_ticks * bytes_per_tick(info);
408			u8b = (uint8_t*)info->buffer +
409				(info->buffer_ticks + 1) *
410				bytes_per_tick(info);
411			fetch = FEEDER_FEED(f->source, c, u8b, fetch, source);
412			RATE_ASSERT(fetch % bytes_per_tick(info) == 0,
413				    ("%s: fetched unaligned bytes (%d)",
414				     __func__, fetch));
415			info->buffer_ticks += fetch / bytes_per_tick(info);
416			RATE_ASSERT(src_ticks_per_cycle(info) >=
417				    info->buffer_ticks,
418				    ("%s: buffer overfilled (%d > %d).",
419				     __func__, info->buffer_ticks,
420				 src_ticks_per_cycle(info)));
421			if (fetch == 0)
422				break;
423		}
424
425		/* Find amount of input buffer data that should be processed */
426		d_ticks = (count - done) / bytes_per_tick(info);
427		s_ticks = info->buffer_ticks - info->buffer_pos;
428		if (info->buffer_ticks != src_ticks_per_cycle(info)) {
429			if (s_ticks > 8)
430				s_ticks -= 8;
431			else
432				s_ticks = 0;
433		}
434
435		d_ticks = info->convert(info, s_ticks, d_ticks,
436					(int16_t*)(b + done));
437		if (d_ticks == 0)
438			break;
439		done += d_ticks * bytes_per_tick(info);
440
441		RATE_ASSERT(info->buffer_pos <= info->buffer_ticks,
442			    ("%s: buffer_ticks too big\n", __func__));
443		RATE_ASSERT(info->buffer_ticks <= src_ticks_per_cycle(info),
444			    ("too many ticks %d /  %d\n",
445			     info->buffer_ticks, src_ticks_per_cycle(info)));
446		RATE_TRACE("%s: ticks %5d / %d pos %d\n", __func__,
447			   info->buffer_ticks, src_ticks_per_cycle(info),
448			   info->buffer_pos);
449
450		if (src_ticks_per_cycle(info) <= info->buffer_pos) {
451			/* End of cycle reached, copy last samples to start */
452			uint8_t *u8b;
453			u8b = (uint8_t*)info->buffer;
454			bcopy(u8b + src_bytes_per_cycle(info), u8b,
455			      bytes_per_tick(info));
456
457			RATE_ASSERT(info->alpha == 0,
458				    ("%s: completed cycle with "
459				     "alpha non-zero", __func__, info->alpha));
460
461			info->buffer_pos = 0;
462			info->buffer_ticks = 0;
463		}
464	}
465
466	RATE_ASSERT(count >= done,
467		    ("%s: generated too many bytes of data (%d > %d).",
468		     __func__, done, count));
469
470	if (done != count) {
471		RATE_TRACE("Only did %d of %d\n", done, count);
472	}
473
474	return done;
475}
476
477static struct pcm_feederdesc feeder_rate_desc[] = {
478	{FEEDER_RATE, AFMT_S16_LE | AFMT_STEREO, AFMT_S16_LE | AFMT_STEREO, 0},
479	{0, 0, 0, 0},
480};
481static kobj_method_t feeder_rate_methods[] = {
482    	KOBJMETHOD(feeder_init,		feed_rate_init),
483    	KOBJMETHOD(feeder_free,		feed_rate_free),
484    	KOBJMETHOD(feeder_set,		feed_rate_set),
485    	KOBJMETHOD(feeder_get,		feed_rate_get),
486    	KOBJMETHOD(feeder_feed,		feed_rate),
487	{0, 0}
488};
489FEEDER_DECLARE(feeder_rate, 2, NULL);
490
491