1/**
2 * \file pcm/pcm_dmix_i386.h
3 * \ingroup PCM_Plugins
4 * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - I386 assembler code
5 * \author Jaroslav Kysela <perex@perex.cz>
6 * \date 2003
7 */
8/*
9 *  PCM - Direct Stream Mixing
10 *  Copyright (c) 2003 by Jaroslav Kysela <perex@perex.cz>
11 *
12 *
13 *   This library is free software; you can redistribute it and/or modify
14 *   it under the terms of the GNU Lesser General Public License as
15 *   published by the Free Software Foundation; either version 2.1 of
16 *   the License, or (at your option) any later version.
17 *
18 *   This program is distributed in the hope that it will be useful,
19 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
20 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 *   GNU Lesser General Public License for more details.
22 *
23 *   You should have received a copy of the GNU Lesser General Public
24 *   License along with this library; if not, write to the Free Software
25 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
26 *
27 */
28
29/*
30 *  for plain i386
31 */
32static void MIX_AREAS_16(unsigned int size,
33			 volatile signed short *dst, signed short *src,
34			 volatile signed int *sum, size_t dst_step,
35			 size_t src_step, size_t sum_step)
36{
37	unsigned int old_ebx;
38
39	/*
40	 *  ESI - src
41	 *  EDI - dst
42	 *  EBX - sum
43	 *  ECX - old sample
44	 *  EAX - sample / temporary
45	 *  EDX - temporary
46	 */
47	__asm__ __volatile__ (
48		"\n"
49
50		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
51		/*
52		 *  initialization, load ESI, EDI, EBX registers
53		 */
54		"\tmovl %1, %%edi\n"
55		"\tmovl %2, %%esi\n"
56		"\tmovl %3, %%ebx\n"
57		"\tcmpl $0, %0\n"
58		"\tjnz 2f\n"
59		"\tjmp 7f\n"
60
61
62		/*
63		 * for (;;)
64		 */
65		"\t.p2align 4,,15\n"
66		"1:"
67		"\tadd %4, %%edi\n"
68		"\tadd %5, %%esi\n"
69		"\tadd %6, %%ebx\n"
70
71		/*
72		 *   sample = *src;
73		 *   sum_sample = *sum;
74		 *   if (cmpxchg(*dst, 0, 1) == 0)
75		 *     sample -= sum_sample;
76		 *   xadd(*sum, sample);
77		 */
78
79		"2:"
80		"\tmovw $0, %%ax\n"
81		"\tmovw $1, %%cx\n"
82		"\tmovl (%%ebx), %%edx\n"
83		"\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
84		"\tmovswl (%%esi), %%ecx\n"
85		"\tjnz 3f\n"
86		"\t" XSUB " %%edx, %%ecx\n"
87		"3:"
88		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
89
90		/*
91		 *   do {
92		 *     sample = old_sample = *sum;
93		 *     saturate(v);
94		 *     *dst = sample;
95		 *   } while (v != *sum);
96		 */
97
98		"4:"
99		"\tmovl (%%ebx), %%ecx\n"
100		"\tcmpl $0x7fff,%%ecx\n"
101		"\tjg 5f\n"
102		"\tcmpl $-0x8000,%%ecx\n"
103		"\tjl 6f\n"
104		"\tmovw %%cx, (%%edi)\n"
105		"\tcmpl %%ecx, (%%ebx)\n"
106		"\tjnz 4b\n"
107
108		/*
109		 * while (size-- > 0)
110		 */
111		"\tdecl %0\n"
112		"\tjnz 1b\n"
113		"\tjmp 7f\n"
114
115		/*
116		 *  sample > 0x7fff
117		 */
118
119		"\t.p2align 4,,15\n"
120
121		"5:"
122		"\tmovw $0x7fff, (%%edi)\n"
123		"\tcmpl %%ecx,(%%ebx)\n"
124		"\tjnz 4b\n"
125		"\tdecl %0\n"
126		"\tjnz 1b\n"
127		"\tjmp 7f\n"
128
129		/*
130		 *  sample < -0x8000
131		 */
132
133		"\t.p2align 4,,15\n"
134
135		"6:"
136		"\tmovw $-0x8000, (%%edi)\n"
137		"\tcmpl %%ecx, (%%ebx)\n"
138		"\tjnz 4b\n"
139		"\tdecl %0\n"
140		"\tjnz 1b\n"
141
142		"7:"
143		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
144
145		: /* no output regs */
146		: "m" (size), "m" (dst), "m" (src),
147		  "m" (sum), "m" (dst_step), "m" (src_step),
148		  "m" (sum_step), "m" (old_ebx)
149		: "esi", "edi", "edx", "ecx", "eax"
150	);
151}
152
153/*
154 *  MMX optimized
155 */
156static void MIX_AREAS_16_MMX(unsigned int size,
157			     volatile signed short *dst, signed short *src,
158			     volatile signed int *sum, size_t dst_step,
159			     size_t src_step, size_t sum_step)
160{
161	unsigned int old_ebx;
162
163	/*
164	 *  ESI - src
165	 *  EDI - dst
166	 *  EBX - sum
167	 *  ECX - old sample
168	 *  EAX - sample / temporary
169	 *  EDX - temporary
170	 */
171	__asm__ __volatile__ (
172		"\n"
173
174		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
175		/*
176		 *  initialization, load ESI, EDI, EBX registers
177		 */
178		"\tmovl %1, %%edi\n"
179		"\tmovl %2, %%esi\n"
180		"\tmovl %3, %%ebx\n"
181		"\tcmpl $0, %0\n"
182		"\tjnz 2f\n"
183		"\tjmp 5f\n"
184
185		"\t.p2align 4,,15\n"
186		"1:"
187		"\tadd %4, %%edi\n"
188		"\tadd %5, %%esi\n"
189		"\tadd %6, %%ebx\n"
190
191		"2:"
192		/*
193		 *   sample = *src;
194		 *   sum_sample = *sum;
195		 *   if (cmpxchg(*dst, 0, 1) == 0)
196		 *     sample -= sum_sample;
197		 *   xadd(*sum, sample);
198		 */
199		"\tmovw $0, %%ax\n"
200		"\tmovw $1, %%cx\n"
201		"\tmovl (%%ebx), %%edx\n"
202		"\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
203		"\tmovswl (%%esi), %%ecx\n"
204		"\tjnz 3f\n"
205		"\t" XSUB " %%edx, %%ecx\n"
206		"3:"
207		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
208
209		/*
210		 *   do {
211		 *     sample = old_sample = *sum;
212		 *     saturate(v);
213		 *     *dst = sample;
214		 *   } while (v != *sum);
215		 */
216
217		"4:"
218		"\tmovl (%%ebx), %%ecx\n"
219		"\tmovd %%ecx, %%mm0\n"
220		"\tpackssdw %%mm1, %%mm0\n"
221		"\tmovd %%mm0, %%eax\n"
222		"\tmovw %%ax, (%%edi)\n"
223		"\tcmpl %%ecx, (%%ebx)\n"
224		"\tjnz 4b\n"
225
226		/*
227		 * while (size-- > 0)
228		 */
229		"\tdecl %0\n"
230		"\tjnz 1b\n"
231		"\temms\n"
232                "5:"
233		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
234
235		: /* no output regs */
236		: "m" (size), "m" (dst), "m" (src),
237		  "m" (sum), "m" (dst_step), "m" (src_step),
238		  "m" (sum_step), "m" (old_ebx)
239		: "esi", "edi", "edx", "ecx", "eax"
240	);
241}
242
243/*
244 *  for plain i386, 32-bit version (24-bit resolution)
245 */
246static void MIX_AREAS_32(unsigned int size,
247			 volatile signed int *dst, signed int *src,
248			 volatile signed int *sum, size_t dst_step,
249			 size_t src_step, size_t sum_step)
250{
251	unsigned int old_ebx;
252
253	/*
254	 *  ESI - src
255	 *  EDI - dst
256	 *  EBX - sum
257	 *  ECX - old sample
258	 *  EAX - sample / temporary
259	 *  EDX - temporary
260	 */
261	__asm__ __volatile__ (
262		"\n"
263
264		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
265		/*
266		 *  initialization, load ESI, EDI, EBX registers
267		 */
268		"\tmovl %1, %%edi\n"
269		"\tmovl %2, %%esi\n"
270		"\tmovl %3, %%ebx\n"
271		"\tcmpl $0, %0\n"
272		"\tjnz 1f\n"
273		"\tjmp 6f\n"
274
275		"\t.p2align 4,,15\n"
276
277		"1:"
278
279		/*
280		 *   sample = *src;
281		 *   sum_sample = *sum;
282		 *   if (cmpxchg(*dst, 0, 1) == 0)
283		 *     sample -= sum_sample;
284		 *   xadd(*sum, sample);
285		 */
286		"\tmovl $0, %%eax\n"
287		"\tmovl $1, %%ecx\n"
288		"\tmovl (%%ebx), %%edx\n"
289		"\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%edi)\n"
290		"\tjnz 2f\n"
291		"\tmovl (%%esi), %%ecx\n"
292		/* sample >>= 8 */
293		"\tsarl $8, %%ecx\n"
294		"\t" XSUB " %%edx, %%ecx\n"
295		"\tjmp 21f\n"
296		"2:"
297		"\tmovl (%%esi), %%ecx\n"
298		/* sample >>= 8 */
299		"\tsarl $8, %%ecx\n"
300		"21:"
301		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
302
303		/*
304		 *   do {
305		 *     sample = old_sample = *sum;
306		 *     saturate(v);
307		 *     *dst = sample;
308		 *   } while (v != *sum);
309		 */
310
311		"3:"
312		"\tmovl (%%ebx), %%ecx\n"
313		/*
314		 *  if (sample > 0x7fff00)
315		 */
316		"\tmovl $0x7fffff, %%eax\n"
317		"\tcmpl %%eax, %%ecx\n"
318		"\tjg 4f\n"
319		/*
320		 *  if (sample < -0x800000)
321		 */
322		"\tmovl $-0x800000, %%eax\n"
323		"\tcmpl %%eax, %%ecx\n"
324		"\tjl 4f\n"
325		"\tmovl %%ecx, %%eax\n"
326		"4:"
327		/*
328		 *  sample <<= 8;
329		 */
330		"\tsall $8, %%eax\n"
331		"\tmovl %%eax, (%%edi)\n"
332		"\tcmpl %%ecx, (%%ebx)\n"
333		"\tjnz 3b\n"
334
335		/*
336		 * while (size-- > 0)
337		 */
338		"\tdecl %0\n"
339		"\tjz 6f\n"
340		"\tadd %4, %%edi\n"
341		"\tadd %5, %%esi\n"
342		"\tadd %6, %%ebx\n"
343		"\tjmp 1b\n"
344
345		"6:"
346		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
347
348		: /* no output regs */
349		: "m" (size), "m" (dst), "m" (src),
350		  "m" (sum), "m" (dst_step), "m" (src_step),
351		  "m" (sum_step), "m" (old_ebx)
352		: "esi", "edi", "edx", "ecx", "eax"
353	);
354}
355
356/*
357 * 24-bit version for plain i386
358 */
359static void MIX_AREAS_24(unsigned int size,
360			 volatile unsigned char *dst, unsigned char *src,
361			 volatile signed int *sum, size_t dst_step,
362			 size_t src_step, size_t sum_step)
363{
364	unsigned int old_ebx;
365
366	/*
367	 *  ESI - src
368	 *  EDI - dst
369	 *  EBX - sum
370	 *  ECX - old sample
371	 *  EAX - sample / temporary
372	 *  EDX - temporary
373	 */
374	__asm__ __volatile__ (
375		"\n"
376
377		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
378		/*
379		 *  initialization, load ESI, EDI, EBX registers
380		 */
381		"\tmovl %1, %%edi\n"
382		"\tmovl %2, %%esi\n"
383		"\tmovl %3, %%ebx\n"
384		"\tcmpl $0, %0\n"
385		"\tjnz 1f\n"
386		"\tjmp 6f\n"
387
388		"\t.p2align 4,,15\n"
389
390		"1:"
391
392		/*
393		 *   sample = *src;
394		 *   sum_sample = *sum;
395		 *   if (test_and_set_bit(0, dst) == 0)
396		 *     sample -= sum_sample;
397		 *   *sum += sample;
398		 */
399		"\tmovsbl 2(%%esi), %%eax\n"
400		"\tmovzwl (%%esi), %%ecx\n"
401		"\tmovl (%%ebx), %%edx\n"
402		"\tsall $16, %%eax\n"
403		"\torl %%eax, %%ecx\n"
404		"\t" LOCK_PREFIX "btsw $0, (%%edi)\n"
405		"\tjc 2f\n"
406		"\t" XSUB " %%edx, %%ecx\n"
407		"2:"
408		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
409
410		/*
411		 *   do {
412		 *     sample = old_sample = *sum;
413		 *     saturate(sample);
414		 *     *dst = sample | 1;
415		 *   } while (old_sample != *sum);
416		 */
417
418		"3:"
419		"\tmovl (%%ebx), %%ecx\n"
420		/*
421		 *  if (sample > 0x7fffff)
422		 */
423		"\tmovl $0x7fffff, %%eax\n"
424		"\tcmpl %%eax, %%ecx\n"
425		"\tjg 4f\n"
426		/*
427		 *  if (sample < -0x7fffff)
428		 */
429		"\tmovl $-0x7fffff, %%eax\n"
430		"\tcmpl %%eax, %%ecx\n"
431		"\tjl 4f\n"
432		"\tmovl %%ecx, %%eax\n"
433		"\torl $1, %%eax\n"
434		"4:"
435		"\tmovw %%ax, (%%edi)\n"
436		"\tshrl $16, %%eax\n"
437		"\tmovb %%al, 2(%%edi)\n"
438		"\tcmpl %%ecx, (%%ebx)\n"
439		"\tjnz 3b\n"
440
441		/*
442		 * while (size-- > 0)
443		 */
444		"\tdecl %0\n"
445		"\tjz 6f\n"
446		"\tadd %4, %%edi\n"
447		"\tadd %5, %%esi\n"
448		"\tadd %6, %%ebx\n"
449		"\tjmp 1b\n"
450
451		"6:"
452		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
453
454		: /* no output regs */
455		: "m" (size), "m" (dst), "m" (src),
456		  "m" (sum), "m" (dst_step), "m" (src_step),
457		  "m" (sum_step), "m" (old_ebx)
458		: "esi", "edi", "edx", "ecx", "eax"
459	);
460}
461
462/*
463 * 24-bit version for Pentium Pro/II
464 */
465static void MIX_AREAS_24_CMOV(unsigned int size,
466			      volatile unsigned char *dst, unsigned char *src,
467			      volatile signed int *sum, size_t dst_step,
468			      size_t src_step, size_t sum_step)
469{
470	unsigned int old_ebx;
471
472	/*
473	 *  ESI - src
474	 *  EDI - dst
475	 *  EBX - sum
476	 *  ECX - old sample
477	 *  EAX - sample / temporary
478	 *  EDX - temporary
479	 */
480	__asm__ __volatile__ (
481		"\n"
482
483		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
484		/*
485		 *  initialization, load ESI, EDI, EBX registers
486		 */
487		"\tmovl %1, %%edi\n"
488		"\tmovl %2, %%esi\n"
489		"\tmovl %3, %%ebx\n"
490		"\tcmpl $0, %0\n"
491		"\tjz 6f\n"
492
493		"\t.p2align 4,,15\n"
494
495		"1:"
496
497		/*
498		 *   sample = *src;
499		 *   sum_sample = *sum;
500		 *   if (test_and_set_bit(0, dst) == 0)
501		 *     sample -= sum_sample;
502		 *   *sum += sample;
503		 */
504		"\tmovsbl 2(%%esi), %%eax\n"
505		"\tmovzwl (%%esi), %%ecx\n"
506		"\tmovl (%%ebx), %%edx\n"
507		"\tsall $16, %%eax\n"
508		"\t" LOCK_PREFIX "btsw $0, (%%edi)\n"
509		"\tleal (%%ecx,%%eax,1), %%ecx\n"
510		"\tjc 2f\n"
511		"\t" XSUB " %%edx, %%ecx\n"
512		"2:"
513		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
514
515		/*
516		 *   do {
517		 *     sample = old_sample = *sum;
518		 *     saturate(sample);
519		 *     *dst = sample | 1;
520		 *   } while (old_sample != *sum);
521		 */
522
523		"3:"
524		"\tmovl (%%ebx), %%ecx\n"
525
526		"\tmovl $0x7fffff, %%eax\n"
527		"\tmovl $-0x7fffff, %%edx\n"
528		"\tcmpl %%eax, %%ecx\n"
529		"\tcmovng %%ecx, %%eax\n"
530		"\tcmpl %%edx, %%ecx\n"
531		"\tcmovl %%edx, %%eax\n"
532
533		"\torl $1, %%eax\n"
534		"\tmovw %%ax, (%%edi)\n"
535		"\tshrl $16, %%eax\n"
536		"\tmovb %%al, 2(%%edi)\n"
537
538		"\tcmpl %%ecx, (%%ebx)\n"
539		"\tjnz 3b\n"
540
541		/*
542		 * while (size-- > 0)
543		 */
544		"\tadd %4, %%edi\n"
545		"\tadd %5, %%esi\n"
546		"\tadd %6, %%ebx\n"
547		"\tdecl %0\n"
548		"\tjnz 1b\n"
549
550		"6:"
551		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
552
553		: /* no output regs */
554		: "m" (size), "m" (dst), "m" (src),
555		  "m" (sum), "m" (dst_step), "m" (src_step),
556		  "m" (sum_step), "m" (old_ebx)
557		: "esi", "edi", "edx", "ecx", "eax"
558	);
559}
560