1/** 2 * \file pcm/pcm_dmix_i386.h 3 * \ingroup PCM_Plugins 4 * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - I386 assembler code 5 * \author Jaroslav Kysela <perex@perex.cz> 6 * \date 2003 7 */ 8/* 9 * PCM - Direct Stream Mixing 10 * Copyright (c) 2003 by Jaroslav Kysela <perex@perex.cz> 11 * 12 * 13 * This library is free software; you can redistribute it and/or modify 14 * it under the terms of the GNU Lesser General Public License as 15 * published by the Free Software Foundation; either version 2.1 of 16 * the License, or (at your option) any later version. 17 * 18 * This program is distributed in the hope that it will be useful, 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 * GNU Lesser General Public License for more details. 22 * 23 * You should have received a copy of the GNU Lesser General Public 24 * License along with this library; if not, write to the Free Software 25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 26 * 27 */ 28 29/* 30 * for plain i386 31 */ 32static void MIX_AREAS_16(unsigned int size, 33 volatile signed short *dst, signed short *src, 34 volatile signed int *sum, size_t dst_step, 35 size_t src_step, size_t sum_step) 36{ 37 unsigned int old_ebx; 38 39 /* 40 * ESI - src 41 * EDI - dst 42 * EBX - sum 43 * ECX - old sample 44 * EAX - sample / temporary 45 * EDX - temporary 46 */ 47 __asm__ __volatile__ ( 48 "\n" 49 50 "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 51 /* 52 * initialization, load ESI, EDI, EBX registers 53 */ 54 "\tmovl %1, %%edi\n" 55 "\tmovl %2, %%esi\n" 56 "\tmovl %3, %%ebx\n" 57 "\tcmpl $0, %0\n" 58 "\tjnz 2f\n" 59 "\tjmp 7f\n" 60 61 62 /* 63 * for (;;) 64 */ 65 "\t.p2align 4,,15\n" 66 "1:" 67 "\tadd %4, %%edi\n" 68 "\tadd %5, %%esi\n" 69 "\tadd %6, %%ebx\n" 70 71 /* 72 * sample = *src; 73 * sum_sample = *sum; 74 * if (cmpxchg(*dst, 0, 1) == 0) 75 * sample -= sum_sample; 76 * xadd(*sum, sample); 77 */ 78 79 "2:" 80 "\tmovw $0, %%ax\n" 81 "\tmovw $1, %%cx\n" 82 "\tmovl (%%ebx), %%edx\n" 83 "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n" 84 "\tmovswl (%%esi), %%ecx\n" 85 "\tjnz 3f\n" 86 "\t" XSUB " %%edx, %%ecx\n" 87 "3:" 88 "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n" 89 90 /* 91 * do { 92 * sample = old_sample = *sum; 93 * saturate(v); 94 * *dst = sample; 95 * } while (v != *sum); 96 */ 97 98 "4:" 99 "\tmovl (%%ebx), %%ecx\n" 100 "\tcmpl $0x7fff,%%ecx\n" 101 "\tjg 5f\n" 102 "\tcmpl $-0x8000,%%ecx\n" 103 "\tjl 6f\n" 104 "\tmovw %%cx, (%%edi)\n" 105 "\tcmpl %%ecx, (%%ebx)\n" 106 "\tjnz 4b\n" 107 108 /* 109 * while (size-- > 0) 110 */ 111 "\tdecl %0\n" 112 "\tjnz 1b\n" 113 "\tjmp 7f\n" 114 115 /* 116 * sample > 0x7fff 117 */ 118 119 "\t.p2align 4,,15\n" 120 121 "5:" 122 "\tmovw $0x7fff, (%%edi)\n" 123 "\tcmpl %%ecx,(%%ebx)\n" 124 "\tjnz 4b\n" 125 "\tdecl %0\n" 126 "\tjnz 1b\n" 127 "\tjmp 7f\n" 128 129 /* 130 * sample < -0x8000 131 */ 132 133 "\t.p2align 4,,15\n" 134 135 "6:" 136 "\tmovw $-0x8000, (%%edi)\n" 137 "\tcmpl %%ecx, (%%ebx)\n" 138 "\tjnz 4b\n" 139 "\tdecl %0\n" 140 "\tjnz 1b\n" 141 142 "7:" 143 "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 144 145 : /* no output regs */ 146 : "m" (size), "m" (dst), "m" (src), 147 "m" (sum), "m" (dst_step), "m" (src_step), 148 "m" (sum_step), "m" (old_ebx) 149 : "esi", "edi", "edx", "ecx", "eax" 150 ); 151} 152 153/* 154 * MMX optimized 155 */ 156static void MIX_AREAS_16_MMX(unsigned int size, 157 volatile signed short *dst, signed short *src, 158 volatile signed int *sum, size_t dst_step, 159 size_t src_step, size_t sum_step) 160{ 161 unsigned int old_ebx; 162 163 /* 164 * ESI - src 165 * EDI - dst 166 * EBX - sum 167 * ECX - old sample 168 * EAX - sample / temporary 169 * EDX - temporary 170 */ 171 __asm__ __volatile__ ( 172 "\n" 173 174 "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 175 /* 176 * initialization, load ESI, EDI, EBX registers 177 */ 178 "\tmovl %1, %%edi\n" 179 "\tmovl %2, %%esi\n" 180 "\tmovl %3, %%ebx\n" 181 "\tcmpl $0, %0\n" 182 "\tjnz 2f\n" 183 "\tjmp 5f\n" 184 185 "\t.p2align 4,,15\n" 186 "1:" 187 "\tadd %4, %%edi\n" 188 "\tadd %5, %%esi\n" 189 "\tadd %6, %%ebx\n" 190 191 "2:" 192 /* 193 * sample = *src; 194 * sum_sample = *sum; 195 * if (cmpxchg(*dst, 0, 1) == 0) 196 * sample -= sum_sample; 197 * xadd(*sum, sample); 198 */ 199 "\tmovw $0, %%ax\n" 200 "\tmovw $1, %%cx\n" 201 "\tmovl (%%ebx), %%edx\n" 202 "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n" 203 "\tmovswl (%%esi), %%ecx\n" 204 "\tjnz 3f\n" 205 "\t" XSUB " %%edx, %%ecx\n" 206 "3:" 207 "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n" 208 209 /* 210 * do { 211 * sample = old_sample = *sum; 212 * saturate(v); 213 * *dst = sample; 214 * } while (v != *sum); 215 */ 216 217 "4:" 218 "\tmovl (%%ebx), %%ecx\n" 219 "\tmovd %%ecx, %%mm0\n" 220 "\tpackssdw %%mm1, %%mm0\n" 221 "\tmovd %%mm0, %%eax\n" 222 "\tmovw %%ax, (%%edi)\n" 223 "\tcmpl %%ecx, (%%ebx)\n" 224 "\tjnz 4b\n" 225 226 /* 227 * while (size-- > 0) 228 */ 229 "\tdecl %0\n" 230 "\tjnz 1b\n" 231 "\temms\n" 232 "5:" 233 "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 234 235 : /* no output regs */ 236 : "m" (size), "m" (dst), "m" (src), 237 "m" (sum), "m" (dst_step), "m" (src_step), 238 "m" (sum_step), "m" (old_ebx) 239 : "esi", "edi", "edx", "ecx", "eax" 240 ); 241} 242 243/* 244 * for plain i386, 32-bit version (24-bit resolution) 245 */ 246static void MIX_AREAS_32(unsigned int size, 247 volatile signed int *dst, signed int *src, 248 volatile signed int *sum, size_t dst_step, 249 size_t src_step, size_t sum_step) 250{ 251 unsigned int old_ebx; 252 253 /* 254 * ESI - src 255 * EDI - dst 256 * EBX - sum 257 * ECX - old sample 258 * EAX - sample / temporary 259 * EDX - temporary 260 */ 261 __asm__ __volatile__ ( 262 "\n" 263 264 "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 265 /* 266 * initialization, load ESI, EDI, EBX registers 267 */ 268 "\tmovl %1, %%edi\n" 269 "\tmovl %2, %%esi\n" 270 "\tmovl %3, %%ebx\n" 271 "\tcmpl $0, %0\n" 272 "\tjnz 1f\n" 273 "\tjmp 6f\n" 274 275 "\t.p2align 4,,15\n" 276 277 "1:" 278 279 /* 280 * sample = *src; 281 * sum_sample = *sum; 282 * if (cmpxchg(*dst, 0, 1) == 0) 283 * sample -= sum_sample; 284 * xadd(*sum, sample); 285 */ 286 "\tmovl $0, %%eax\n" 287 "\tmovl $1, %%ecx\n" 288 "\tmovl (%%ebx), %%edx\n" 289 "\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%edi)\n" 290 "\tjnz 2f\n" 291 "\tmovl (%%esi), %%ecx\n" 292 /* sample >>= 8 */ 293 "\tsarl $8, %%ecx\n" 294 "\t" XSUB " %%edx, %%ecx\n" 295 "\tjmp 21f\n" 296 "2:" 297 "\tmovl (%%esi), %%ecx\n" 298 /* sample >>= 8 */ 299 "\tsarl $8, %%ecx\n" 300 "21:" 301 "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n" 302 303 /* 304 * do { 305 * sample = old_sample = *sum; 306 * saturate(v); 307 * *dst = sample; 308 * } while (v != *sum); 309 */ 310 311 "3:" 312 "\tmovl (%%ebx), %%ecx\n" 313 /* 314 * if (sample > 0x7fff00) 315 */ 316 "\tmovl $0x7fffff, %%eax\n" 317 "\tcmpl %%eax, %%ecx\n" 318 "\tjg 4f\n" 319 /* 320 * if (sample < -0x800000) 321 */ 322 "\tmovl $-0x800000, %%eax\n" 323 "\tcmpl %%eax, %%ecx\n" 324 "\tjl 4f\n" 325 "\tmovl %%ecx, %%eax\n" 326 "4:" 327 /* 328 * sample <<= 8; 329 */ 330 "\tsall $8, %%eax\n" 331 "\tmovl %%eax, (%%edi)\n" 332 "\tcmpl %%ecx, (%%ebx)\n" 333 "\tjnz 3b\n" 334 335 /* 336 * while (size-- > 0) 337 */ 338 "\tdecl %0\n" 339 "\tjz 6f\n" 340 "\tadd %4, %%edi\n" 341 "\tadd %5, %%esi\n" 342 "\tadd %6, %%ebx\n" 343 "\tjmp 1b\n" 344 345 "6:" 346 "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 347 348 : /* no output regs */ 349 : "m" (size), "m" (dst), "m" (src), 350 "m" (sum), "m" (dst_step), "m" (src_step), 351 "m" (sum_step), "m" (old_ebx) 352 : "esi", "edi", "edx", "ecx", "eax" 353 ); 354} 355 356/* 357 * 24-bit version for plain i386 358 */ 359static void MIX_AREAS_24(unsigned int size, 360 volatile unsigned char *dst, unsigned char *src, 361 volatile signed int *sum, size_t dst_step, 362 size_t src_step, size_t sum_step) 363{ 364 unsigned int old_ebx; 365 366 /* 367 * ESI - src 368 * EDI - dst 369 * EBX - sum 370 * ECX - old sample 371 * EAX - sample / temporary 372 * EDX - temporary 373 */ 374 __asm__ __volatile__ ( 375 "\n" 376 377 "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 378 /* 379 * initialization, load ESI, EDI, EBX registers 380 */ 381 "\tmovl %1, %%edi\n" 382 "\tmovl %2, %%esi\n" 383 "\tmovl %3, %%ebx\n" 384 "\tcmpl $0, %0\n" 385 "\tjnz 1f\n" 386 "\tjmp 6f\n" 387 388 "\t.p2align 4,,15\n" 389 390 "1:" 391 392 /* 393 * sample = *src; 394 * sum_sample = *sum; 395 * if (test_and_set_bit(0, dst) == 0) 396 * sample -= sum_sample; 397 * *sum += sample; 398 */ 399 "\tmovsbl 2(%%esi), %%eax\n" 400 "\tmovzwl (%%esi), %%ecx\n" 401 "\tmovl (%%ebx), %%edx\n" 402 "\tsall $16, %%eax\n" 403 "\torl %%eax, %%ecx\n" 404 "\t" LOCK_PREFIX "btsw $0, (%%edi)\n" 405 "\tjc 2f\n" 406 "\t" XSUB " %%edx, %%ecx\n" 407 "2:" 408 "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n" 409 410 /* 411 * do { 412 * sample = old_sample = *sum; 413 * saturate(sample); 414 * *dst = sample | 1; 415 * } while (old_sample != *sum); 416 */ 417 418 "3:" 419 "\tmovl (%%ebx), %%ecx\n" 420 /* 421 * if (sample > 0x7fffff) 422 */ 423 "\tmovl $0x7fffff, %%eax\n" 424 "\tcmpl %%eax, %%ecx\n" 425 "\tjg 4f\n" 426 /* 427 * if (sample < -0x7fffff) 428 */ 429 "\tmovl $-0x7fffff, %%eax\n" 430 "\tcmpl %%eax, %%ecx\n" 431 "\tjl 4f\n" 432 "\tmovl %%ecx, %%eax\n" 433 "\torl $1, %%eax\n" 434 "4:" 435 "\tmovw %%ax, (%%edi)\n" 436 "\tshrl $16, %%eax\n" 437 "\tmovb %%al, 2(%%edi)\n" 438 "\tcmpl %%ecx, (%%ebx)\n" 439 "\tjnz 3b\n" 440 441 /* 442 * while (size-- > 0) 443 */ 444 "\tdecl %0\n" 445 "\tjz 6f\n" 446 "\tadd %4, %%edi\n" 447 "\tadd %5, %%esi\n" 448 "\tadd %6, %%ebx\n" 449 "\tjmp 1b\n" 450 451 "6:" 452 "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 453 454 : /* no output regs */ 455 : "m" (size), "m" (dst), "m" (src), 456 "m" (sum), "m" (dst_step), "m" (src_step), 457 "m" (sum_step), "m" (old_ebx) 458 : "esi", "edi", "edx", "ecx", "eax" 459 ); 460} 461 462/* 463 * 24-bit version for Pentium Pro/II 464 */ 465static void MIX_AREAS_24_CMOV(unsigned int size, 466 volatile unsigned char *dst, unsigned char *src, 467 volatile signed int *sum, size_t dst_step, 468 size_t src_step, size_t sum_step) 469{ 470 unsigned int old_ebx; 471 472 /* 473 * ESI - src 474 * EDI - dst 475 * EBX - sum 476 * ECX - old sample 477 * EAX - sample / temporary 478 * EDX - temporary 479 */ 480 __asm__ __volatile__ ( 481 "\n" 482 483 "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 484 /* 485 * initialization, load ESI, EDI, EBX registers 486 */ 487 "\tmovl %1, %%edi\n" 488 "\tmovl %2, %%esi\n" 489 "\tmovl %3, %%ebx\n" 490 "\tcmpl $0, %0\n" 491 "\tjz 6f\n" 492 493 "\t.p2align 4,,15\n" 494 495 "1:" 496 497 /* 498 * sample = *src; 499 * sum_sample = *sum; 500 * if (test_and_set_bit(0, dst) == 0) 501 * sample -= sum_sample; 502 * *sum += sample; 503 */ 504 "\tmovsbl 2(%%esi), %%eax\n" 505 "\tmovzwl (%%esi), %%ecx\n" 506 "\tmovl (%%ebx), %%edx\n" 507 "\tsall $16, %%eax\n" 508 "\t" LOCK_PREFIX "btsw $0, (%%edi)\n" 509 "\tleal (%%ecx,%%eax,1), %%ecx\n" 510 "\tjc 2f\n" 511 "\t" XSUB " %%edx, %%ecx\n" 512 "2:" 513 "\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n" 514 515 /* 516 * do { 517 * sample = old_sample = *sum; 518 * saturate(sample); 519 * *dst = sample | 1; 520 * } while (old_sample != *sum); 521 */ 522 523 "3:" 524 "\tmovl (%%ebx), %%ecx\n" 525 526 "\tmovl $0x7fffff, %%eax\n" 527 "\tmovl $-0x7fffff, %%edx\n" 528 "\tcmpl %%eax, %%ecx\n" 529 "\tcmovng %%ecx, %%eax\n" 530 "\tcmpl %%edx, %%ecx\n" 531 "\tcmovl %%edx, %%eax\n" 532 533 "\torl $1, %%eax\n" 534 "\tmovw %%ax, (%%edi)\n" 535 "\tshrl $16, %%eax\n" 536 "\tmovb %%al, 2(%%edi)\n" 537 538 "\tcmpl %%ecx, (%%ebx)\n" 539 "\tjnz 3b\n" 540 541 /* 542 * while (size-- > 0) 543 */ 544 "\tadd %4, %%edi\n" 545 "\tadd %5, %%esi\n" 546 "\tadd %6, %%ebx\n" 547 "\tdecl %0\n" 548 "\tjnz 1b\n" 549 550 "6:" 551 "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 552 553 : /* no output regs */ 554 : "m" (size), "m" (dst), "m" (src), 555 "m" (sum), "m" (dst_step), "m" (src_step), 556 "m" (sum_step), "m" (old_ebx) 557 : "esi", "edi", "edx", "ecx", "eax" 558 ); 559} 560