1/** 2 * \file pcm/pcm_dmix_x86_64.h 3 * \ingroup PCM_Plugins 4 * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - X86-64 assembler code 5 * \author Takashi Iwai <tiwai@suse.de> 6 * \date 2003 7 */ 8/* 9 * PCM - Direct Stream Mixing 10 * Copyright (c) 2003 by Jaroslav Kysela <perex@perex.cz> 11 * Takashi Iwai <tiwai@suse.de> 12 * 13 * 14 * This library is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU Lesser General Public License as 16 * published by the Free Software Foundation; either version 2.1 of 17 * the License, or (at your option) any later version. 18 * 19 * This program is distributed in the hope that it will be useful, 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 * GNU Lesser General Public License for more details. 23 * 24 * You should have received a copy of the GNU Lesser General Public 25 * License along with this library; if not, write to the Free Software 26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 27 * 28 */ 29 30/* 31 * MMX optimized 32 */ 33static void MIX_AREAS_16(unsigned int size, 34 volatile signed short *dst, signed short *src, 35 volatile signed int *sum, size_t dst_step, 36 size_t src_step, size_t sum_step) 37{ 38 unsigned long long old_rbx; 39 40 /* 41 * RSI - src 42 * RDI - dst 43 * RBX - sum 44 * ECX - old sample 45 * EAX - sample / temporary 46 * EDX - temporary 47 */ 48 __asm__ __volatile__ ( 49 "\n" 50 51 "\tmovq %%rbx, %7\n" 52 /* 53 * initialization, load RSI, RDI, RBX registers 54 */ 55 "\tmovq %1, %%rdi\n" 56 "\tmovq %2, %%rsi\n" 57 "\tmovq %3, %%rbx\n" 58 59 /* 60 * while (size-- > 0) { 61 */ 62 "\tcmpl $0, %0\n" 63 "jz 6f\n" 64 65 "\t.p2align 4,,15\n" 66 67 "1:" 68 69 /* 70 * sample = *src; 71 * sum_sample = *sum; 72 * if (cmpxchg(*dst, 0, 1) == 0) 73 * sample -= sum_sample; 74 * xadd(*sum, sample); 75 */ 76 "\tmovw $0, %%ax\n" 77 "\tmovw $1, %%cx\n" 78 "\tmovl (%%rbx), %%edx\n" 79 "\t" LOCK_PREFIX "cmpxchgw %%cx, (%%rdi)\n" 80 "\tmovswl (%%rsi), %%ecx\n" 81 "\tjnz 2f\n" 82 "\t" XSUB " %%edx, %%ecx\n" 83 "2:" 84 "\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n" 85 86 /* 87 * do { 88 * sample = old_sample = *sum; 89 * saturate(v); 90 * *dst = sample; 91 * } while (v != *sum); 92 */ 93 94 "3:" 95 "\tmovl (%%rbx), %%ecx\n" 96 "\tmovd %%ecx, %%mm0\n" 97 "\tpackssdw %%mm1, %%mm0\n" 98 "\tmovd %%mm0, %%eax\n" 99 "\tmovw %%ax, (%%rdi)\n" 100 "\tcmpl %%ecx, (%%rbx)\n" 101 "\tjnz 3b\n" 102 103 /* 104 * while (size-- > 0) 105 */ 106 "\tadd %4, %%rdi\n" 107 "\tadd %5, %%rsi\n" 108 "\tadd %6, %%rbx\n" 109 "\tdecl %0\n" 110 "\tjnz 1b\n" 111 112 "6:" 113 114 "\temms\n" 115 "\tmovq %7, %%rbx\n" 116 117 : /* no output regs */ 118 : "m" (size), "m" (dst), "m" (src), 119 "m" (sum), "m" (dst_step), "m" (src_step), 120 "m" (sum_step), "m" (old_rbx) 121 : "rsi", "rdi", "edx", "ecx", "eax" 122 ); 123} 124 125/* 126 * 32-bit version (24-bit resolution) 127 */ 128static void MIX_AREAS_32(unsigned int size, 129 volatile signed int *dst, signed int *src, 130 volatile signed int *sum, size_t dst_step, 131 size_t src_step, size_t sum_step) 132{ 133 unsigned long long old_rbx; 134 135 /* 136 * RSI - src 137 * RDI - dst 138 * RBX - sum 139 * ECX - old sample 140 * EAX - sample / temporary 141 * EDX - temporary 142 */ 143 __asm__ __volatile__ ( 144 "\n" 145 146 "\tmovq %%rbx, %7\n" 147 /* 148 * initialization, load ESI, EDI, EBX registers 149 */ 150 "\tmovq %1, %%rdi\n" 151 "\tmovq %2, %%rsi\n" 152 "\tmovq %3, %%rbx\n" 153 154 /* 155 * while (size-- > 0) { 156 */ 157 "\tcmpl $0, %0\n" 158 "jz 6f\n" 159 160 "\t.p2align 4,,15\n" 161 162 "1:" 163 164 /* 165 * sample = *src; 166 * sum_sample = *sum; 167 * if (cmpxchg(*dst, 0, 1) == 0) 168 * sample -= sum_sample; 169 * xadd(*sum, sample); 170 */ 171 "\tmovl $0, %%eax\n" 172 "\tmovl $1, %%ecx\n" 173 "\tmovl (%%rbx), %%edx\n" 174 "\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%rdi)\n" 175 "\tjnz 2f\n" 176 "\tmovl (%%rsi), %%ecx\n" 177 /* sample >>= 8 */ 178 "\tsarl $8, %%ecx\n" 179 "\t" XSUB " %%edx, %%ecx\n" 180 "\tjmp 21f\n" 181 "2:" 182 "\tmovl (%%rsi), %%ecx\n" 183 /* sample >>= 8 */ 184 "\tsarl $8, %%ecx\n" 185 "21:" 186 "\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n" 187 188 /* 189 * do { 190 * sample = old_sample = *sum; 191 * saturate(v); 192 * *dst = sample; 193 * } while (v != *sum); 194 */ 195 196 "3:" 197 "\tmovl (%%rbx), %%ecx\n" 198 /* 199 * if (sample > 0x7fff00) 200 */ 201 "\tmovl $0x7fffff, %%eax\n" 202 "\tcmpl %%eax, %%ecx\n" 203 "\tjg 4f\n" 204 /* 205 * if (sample < -0x800000) 206 */ 207 "\tmovl $-0x800000, %%eax\n" 208 "\tcmpl %%eax, %%ecx\n" 209 "\tjl 4f\n" 210 "\tmovl %%ecx, %%eax\n" 211 "4:" 212 /* 213 * sample <<= 8; 214 */ 215 "\tsall $8, %%eax\n" 216 "\tmovl %%eax, (%%rdi)\n" 217 "\tcmpl %%ecx, (%%rbx)\n" 218 "\tjnz 3b\n" 219 220 /* 221 * while (size-- > 0) 222 */ 223 "\tadd %4, %%rdi\n" 224 "\tadd %5, %%rsi\n" 225 "\tadd %6, %%rbx\n" 226 "\tdecl %0\n" 227 "\tjnz 1b\n" 228 229 "6:" 230 "\tmovq %7, %%rbx\n" 231 232 : /* no output regs */ 233 : "m" (size), "m" (dst), "m" (src), 234 "m" (sum), "m" (dst_step), "m" (src_step), 235 "m" (sum_step), "m" (old_rbx) 236 : "rsi", "rdi", "edx", "ecx", "eax" 237 ); 238} 239 240/* 241 * 24-bit version 242 */ 243static void MIX_AREAS_24(unsigned int size, 244 volatile unsigned char *dst, unsigned char *src, 245 volatile signed int *sum, size_t dst_step, 246 size_t src_step, size_t sum_step) 247{ 248 unsigned long long old_rbx; 249 250 /* 251 * RSI - src 252 * RDI - dst 253 * RBX - sum 254 * ECX - old sample 255 * EAX - sample / temporary 256 * EDX - temporary 257 */ 258 __asm__ __volatile__ ( 259 "\n" 260 261 "\tmovq %%rbx, %7\n" 262 /* 263 * initialization, load ESI, EDI, EBX registers 264 */ 265 "\tmovq %1, %%rdi\n" 266 "\tmovq %2, %%rsi\n" 267 "\tmovq %3, %%rbx\n" 268 269 /* 270 * while (size-- > 0) { 271 */ 272 "\tcmpl $0, %0\n" 273 "jz 6f\n" 274 275 "\t.p2align 4,,15\n" 276 277 "1:" 278 279 /* 280 * sample = *src; 281 * sum_sample = *sum; 282 * if (test_and_set_bit(0, dst) == 0) 283 * sample -= sum_sample; 284 * *sum += sample; 285 */ 286 "\tmovsbl 2(%%rsi), %%eax\n" 287 "\tmovzwl (%%rsi), %%ecx\n" 288 "\tmovl (%%rbx), %%edx\n" 289 "\tsall $16, %%eax\n" 290 "\torl %%eax, %%ecx\n" 291 "\t" LOCK_PREFIX "btsw $0, (%%rdi)\n" 292 "\tjc 2f\n" 293 "\t" XSUB " %%edx, %%ecx\n" 294 "2:" 295 "\t" LOCK_PREFIX XADD " %%ecx, (%%rbx)\n" 296 297 /* 298 * do { 299 * sample = old_sample = *sum; 300 * saturate(sample); 301 * *dst = sample | 1; 302 * } while (old_sample != *sum); 303 */ 304 305 "3:" 306 "\tmovl (%%rbx), %%ecx\n" 307 308 "\tmovl $0x7fffff, %%eax\n" 309 "\tmovl $-0x7fffff, %%edx\n" 310 "\tcmpl %%eax, %%ecx\n" 311 "\tcmovng %%ecx, %%eax\n" 312 "\tcmpl %%edx, %%ecx\n" 313 "\tcmovl %%edx, %%eax\n" 314 315 "\torl $1, %%eax\n" 316 "\tmovw %%ax, (%%rdi)\n" 317 "\tshrl $16, %%eax\n" 318 "\tmovb %%al, 2(%%rdi)\n" 319 320 "\tcmpl %%ecx, (%%rbx)\n" 321 "\tjnz 3b\n" 322 323 /* 324 * while (size-- > 0) 325 */ 326 "\tadd %4, %%rdi\n" 327 "\tadd %5, %%rsi\n" 328 "\tadd %6, %%rbx\n" 329 "\tdecl %0\n" 330 "\tjnz 1b\n" 331 332 "6:" 333 "\tmovq %7, %%rbx\n" 334 335 : /* no output regs */ 336 : "m" (size), "m" (dst), "m" (src), 337 "m" (sum), "m" (dst_step), "m" (src_step), 338 "m" (sum_step), "m" (old_rbx) 339 : "rsi", "rdi", "edx", "ecx", "eax" 340 ); 341} 342