mmintrin.h (117395) | mmintrin.h (122180) |
---|---|
1/* Copyright (C) 2002 Free Software Foundation, Inc. | 1/* Copyright (C) 2002, 2003 Free Software Foundation, Inc. |
2 3 This file is part of GNU CC. 4 5 GNU CC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 --- 10 unchanged lines hidden (view full) --- 20/* As a special exception, if you include this header file into source 21 files compiled by GCC, this header file does not by itself cause 22 the resulting executable to be covered by the GNU General Public 23 License. This exception does not however invalidate any other 24 reasons why the executable file might be covered by the GNU General 25 Public License. */ 26 27/* Implemented from the specification included in the Intel C++ Compiler | 2 3 This file is part of GNU CC. 4 5 GNU CC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 --- 10 unchanged lines hidden (view full) --- 20/* As a special exception, if you include this header file into source 21 files compiled by GCC, this header file does not by itself cause 22 the resulting executable to be covered by the GNU General Public 23 License. This exception does not however invalidate any other 24 reasons why the executable file might be covered by the GNU General 25 Public License. */ 26 27/* Implemented from the specification included in the Intel C++ Compiler |
28 User Guide and Reference, version 5.0. */ | 28 User Guide and Reference, version 8.0. */ |
29 30#ifndef _MMINTRIN_H_INCLUDED 31#define _MMINTRIN_H_INCLUDED 32 33#ifndef __MMX__ 34# error "MMX instruction set not enabled" 35#else 36/* The data type intended for user use. */ --- 6 unchanged lines hidden (view full) --- 43 44/* Empty the multimedia state. */ 45static __inline void 46_mm_empty (void) 47{ 48 __builtin_ia32_emms (); 49} 50 | 29 30#ifndef _MMINTRIN_H_INCLUDED 31#define _MMINTRIN_H_INCLUDED 32 33#ifndef __MMX__ 34# error "MMX instruction set not enabled" 35#else 36/* The data type intended for user use. */ --- 6 unchanged lines hidden (view full) --- 43 44/* Empty the multimedia state. */ 45static __inline void 46_mm_empty (void) 47{ 48 __builtin_ia32_emms (); 49} 50 |
51static __inline void 52_m_empty (void) 53{ 54 _mm_empty (); 55} 56 |
|
51/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ 52static __inline __m64 53_mm_cvtsi32_si64 (int __i) 54{ 55 long long __tmp = (unsigned int)__i; 56 return (__m64) __tmp; 57} 58 | 57/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ 58static __inline __m64 59_mm_cvtsi32_si64 (int __i) 60{ 61 long long __tmp = (unsigned int)__i; 62 return (__m64) __tmp; 63} 64 |
65static __inline __m64 66_m_from_int (int __i) 67{ 68 return _mm_cvtsi32_si64 (__i); 69} 70 |
|
59#ifdef __x86_64__ 60/* Convert I to a __m64 object. */ 61static __inline __m64 62_mm_cvtsi64x_si64 (long long __i) 63{ 64 return (__m64) __i; 65} 66 --- 8 unchanged lines hidden (view full) --- 75/* Convert the lower 32 bits of the __m64 object into an integer. */ 76static __inline int 77_mm_cvtsi64_si32 (__m64 __i) 78{ 79 long long __tmp = (long long)__i; 80 return __tmp; 81} 82 | 71#ifdef __x86_64__ 72/* Convert I to a __m64 object. */ 73static __inline __m64 74_mm_cvtsi64x_si64 (long long __i) 75{ 76 return (__m64) __i; 77} 78 --- 8 unchanged lines hidden (view full) --- 87/* Convert the lower 32 bits of the __m64 object into an integer. */ 88static __inline int 89_mm_cvtsi64_si32 (__m64 __i) 90{ 91 long long __tmp = (long long)__i; 92 return __tmp; 93} 94 |
95static __inline int 96_m_to_int (__m64 __i) 97{ 98 return _mm_cvtsi64_si32 (__i); 99} 100 |
|
83#ifdef __x86_64__ 84/* Convert the lower 32 bits of the __m64 object into an integer. */ 85static __inline long long 86_mm_cvtsi64_si64x (__m64 __i) 87{ 88 return (long long)__i; 89} 90#endif 91 92/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 93 the result, and the four 16-bit values from M2 into the upper four 8-bit 94 values of the result, all with signed saturation. */ 95static __inline __m64 96_mm_packs_pi16 (__m64 __m1, __m64 __m2) 97{ 98 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); 99} 100 | 101#ifdef __x86_64__ 102/* Convert the lower 32 bits of the __m64 object into an integer. */ 103static __inline long long 104_mm_cvtsi64_si64x (__m64 __i) 105{ 106 return (long long)__i; 107} 108#endif 109 110/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 111 the result, and the four 16-bit values from M2 into the upper four 8-bit 112 values of the result, all with signed saturation. */ 113static __inline __m64 114_mm_packs_pi16 (__m64 __m1, __m64 __m2) 115{ 116 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); 117} 118 |
119static __inline __m64 120_m_packsswb (__m64 __m1, __m64 __m2) 121{ 122 return _mm_packs_pi16 (__m1, __m2); 123} 124 |
|
101/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of 102 the result, and the two 32-bit values from M2 into the upper two 16-bit 103 values of the result, all with signed saturation. */ 104static __inline __m64 105_mm_packs_pi32 (__m64 __m1, __m64 __m2) 106{ 107 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); 108} 109 | 125/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of 126 the result, and the two 32-bit values from M2 into the upper two 16-bit 127 values of the result, all with signed saturation. */ 128static __inline __m64 129_mm_packs_pi32 (__m64 __m1, __m64 __m2) 130{ 131 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); 132} 133 |
134static __inline __m64 135_m_packssdw (__m64 __m1, __m64 __m2) 136{ 137 return _mm_packs_pi32 (__m1, __m2); 138} 139 |
|
110/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 111 the result, and the four 16-bit values from M2 into the upper four 8-bit 112 values of the result, all with unsigned saturation. */ 113static __inline __m64 114_mm_packs_pu16 (__m64 __m1, __m64 __m2) 115{ 116 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); 117} 118 | 140/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 141 the result, and the four 16-bit values from M2 into the upper four 8-bit 142 values of the result, all with unsigned saturation. */ 143static __inline __m64 144_mm_packs_pu16 (__m64 __m1, __m64 __m2) 145{ 146 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); 147} 148 |
149static __inline __m64 150_m_packuswb (__m64 __m1, __m64 __m2) 151{ 152 return _mm_packs_pu16 (__m1, __m2); 153} 154 |
|
119/* Interleave the four 8-bit values from the high half of M1 with the four 120 8-bit values from the high half of M2. */ 121static __inline __m64 122_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) 123{ 124 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); 125} 126 | 155/* Interleave the four 8-bit values from the high half of M1 with the four 156 8-bit values from the high half of M2. */ 157static __inline __m64 158_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) 159{ 160 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); 161} 162 |
163static __inline __m64 164_m_punpckhbw (__m64 __m1, __m64 __m2) 165{ 166 return _mm_unpackhi_pi8 (__m1, __m2); 167} 168 |
|
127/* Interleave the two 16-bit values from the high half of M1 with the two 128 16-bit values from the high half of M2. */ 129static __inline __m64 130_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) 131{ 132 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); 133} 134 | 169/* Interleave the two 16-bit values from the high half of M1 with the two 170 16-bit values from the high half of M2. */ 171static __inline __m64 172_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) 173{ 174 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); 175} 176 |
177static __inline __m64 178_m_punpckhwd (__m64 __m1, __m64 __m2) 179{ 180 return _mm_unpackhi_pi16 (__m1, __m2); 181} 182 |
|
135/* Interleave the 32-bit value from the high half of M1 with the 32-bit 136 value from the high half of M2. */ 137static __inline __m64 138_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) 139{ 140 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); 141} 142 | 183/* Interleave the 32-bit value from the high half of M1 with the 32-bit 184 value from the high half of M2. */ 185static __inline __m64 186_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) 187{ 188 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); 189} 190 |
191static __inline __m64 192_m_punpckhdq (__m64 __m1, __m64 __m2) 193{ 194 return _mm_unpackhi_pi32 (__m1, __m2); 195} 196 |
|
143/* Interleave the four 8-bit values from the low half of M1 with the four 144 8-bit values from the low half of M2. */ 145static __inline __m64 146_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) 147{ 148 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); 149} 150 | 197/* Interleave the four 8-bit values from the low half of M1 with the four 198 8-bit values from the low half of M2. */ 199static __inline __m64 200_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) 201{ 202 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); 203} 204 |
205static __inline __m64 206_m_punpcklbw (__m64 __m1, __m64 __m2) 207{ 208 return _mm_unpacklo_pi8 (__m1, __m2); 209} 210 |
|
151/* Interleave the two 16-bit values from the low half of M1 with the two 152 16-bit values from the low half of M2. */ 153static __inline __m64 154_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) 155{ 156 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); 157} 158 | 211/* Interleave the two 16-bit values from the low half of M1 with the two 212 16-bit values from the low half of M2. */ 213static __inline __m64 214_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) 215{ 216 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); 217} 218 |
219static __inline __m64 220_m_punpcklwd (__m64 __m1, __m64 __m2) 221{ 222 return _mm_unpacklo_pi16 (__m1, __m2); 223} 224 |
|
159/* Interleave the 32-bit value from the low half of M1 with the 32-bit 160 value from the low half of M2. */ 161static __inline __m64 162_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) 163{ 164 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); 165} 166 | 225/* Interleave the 32-bit value from the low half of M1 with the 32-bit 226 value from the low half of M2. */ 227static __inline __m64 228_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) 229{ 230 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); 231} 232 |
233static __inline __m64 234_m_punpckldq (__m64 __m1, __m64 __m2) 235{ 236 return _mm_unpacklo_pi32 (__m1, __m2); 237} 238 |
|
167/* Add the 8-bit values in M1 to the 8-bit values in M2. */ 168static __inline __m64 169_mm_add_pi8 (__m64 __m1, __m64 __m2) 170{ 171 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); 172} 173 | 239/* Add the 8-bit values in M1 to the 8-bit values in M2. */ 240static __inline __m64 241_mm_add_pi8 (__m64 __m1, __m64 __m2) 242{ 243 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); 244} 245 |
246static __inline __m64 247_m_paddb (__m64 __m1, __m64 __m2) 248{ 249 return _mm_add_pi8 (__m1, __m2); 250} 251 |
|
174/* Add the 16-bit values in M1 to the 16-bit values in M2. */ 175static __inline __m64 176_mm_add_pi16 (__m64 __m1, __m64 __m2) 177{ 178 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); 179} 180 | 252/* Add the 16-bit values in M1 to the 16-bit values in M2. */ 253static __inline __m64 254_mm_add_pi16 (__m64 __m1, __m64 __m2) 255{ 256 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); 257} 258 |
259static __inline __m64 260_m_paddw (__m64 __m1, __m64 __m2) 261{ 262 return _mm_add_pi16 (__m1, __m2); 263} 264 |
|
181/* Add the 32-bit values in M1 to the 32-bit values in M2. */ 182static __inline __m64 183_mm_add_pi32 (__m64 __m1, __m64 __m2) 184{ 185 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); 186} 187 | 265/* Add the 32-bit values in M1 to the 32-bit values in M2. */ 266static __inline __m64 267_mm_add_pi32 (__m64 __m1, __m64 __m2) 268{ 269 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); 270} 271 |
272static __inline __m64 273_m_paddd (__m64 __m1, __m64 __m2) 274{ 275 return _mm_add_pi32 (__m1, __m2); 276} 277 |
|
188/* Add the 64-bit values in M1 to the 64-bit values in M2. */ 189static __inline __m64 190_mm_add_si64 (__m64 __m1, __m64 __m2) 191{ 192 return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2); 193} 194 195/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed 196 saturated arithmetic. */ 197static __inline __m64 198_mm_adds_pi8 (__m64 __m1, __m64 __m2) 199{ 200 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); 201} 202 | 278/* Add the 64-bit values in M1 to the 64-bit values in M2. */ 279static __inline __m64 280_mm_add_si64 (__m64 __m1, __m64 __m2) 281{ 282 return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2); 283} 284 285/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed 286 saturated arithmetic. */ 287static __inline __m64 288_mm_adds_pi8 (__m64 __m1, __m64 __m2) 289{ 290 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); 291} 292 |
293static __inline __m64 294_m_paddsb (__m64 __m1, __m64 __m2) 295{ 296 return _mm_adds_pi8 (__m1, __m2); 297} 298 |
|
203/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed 204 saturated arithmetic. */ 205static __inline __m64 206_mm_adds_pi16 (__m64 __m1, __m64 __m2) 207{ 208 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); 209} 210 | 299/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed 300 saturated arithmetic. */ 301static __inline __m64 302_mm_adds_pi16 (__m64 __m1, __m64 __m2) 303{ 304 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); 305} 306 |
307static __inline __m64 308_m_paddsw (__m64 __m1, __m64 __m2) 309{ 310 return _mm_adds_pi16 (__m1, __m2); 311} 312 |
|
211/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned 212 saturated arithmetic. */ 213static __inline __m64 214_mm_adds_pu8 (__m64 __m1, __m64 __m2) 215{ 216 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); 217} 218 | 313/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned 314 saturated arithmetic. */ 315static __inline __m64 316_mm_adds_pu8 (__m64 __m1, __m64 __m2) 317{ 318 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); 319} 320 |
321static __inline __m64 322_m_paddusb (__m64 __m1, __m64 __m2) 323{ 324 return _mm_adds_pu8 (__m1, __m2); 325} 326 |
|
219/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned 220 saturated arithmetic. */ 221static __inline __m64 222_mm_adds_pu16 (__m64 __m1, __m64 __m2) 223{ 224 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); 225} 226 | 327/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned 328 saturated arithmetic. */ 329static __inline __m64 330_mm_adds_pu16 (__m64 __m1, __m64 __m2) 331{ 332 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); 333} 334 |
335static __inline __m64 336_m_paddusw (__m64 __m1, __m64 __m2) 337{ 338 return _mm_adds_pu16 (__m1, __m2); 339} 340 |
|
227/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ 228static __inline __m64 229_mm_sub_pi8 (__m64 __m1, __m64 __m2) 230{ 231 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); 232} 233 | 341/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ 342static __inline __m64 343_mm_sub_pi8 (__m64 __m1, __m64 __m2) 344{ 345 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); 346} 347 |
348static __inline __m64 349_m_psubb (__m64 __m1, __m64 __m2) 350{ 351 return _mm_sub_pi8 (__m1, __m2); 352} 353 |
|
234/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ 235static __inline __m64 236_mm_sub_pi16 (__m64 __m1, __m64 __m2) 237{ 238 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); 239} 240 | 354/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ 355static __inline __m64 356_mm_sub_pi16 (__m64 __m1, __m64 __m2) 357{ 358 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); 359} 360 |
361static __inline __m64 362_m_psubw (__m64 __m1, __m64 __m2) 363{ 364 return _mm_sub_pi16 (__m1, __m2); 365} 366 |
|
241/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ 242static __inline __m64 243_mm_sub_pi32 (__m64 __m1, __m64 __m2) 244{ 245 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); 246} 247 | 367/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ 368static __inline __m64 369_mm_sub_pi32 (__m64 __m1, __m64 __m2) 370{ 371 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); 372} 373 |
374static __inline __m64 375_m_psubd (__m64 __m1, __m64 __m2) 376{ 377 return _mm_sub_pi32 (__m1, __m2); 378} 379 |
|
248/* Add the 64-bit values in M1 to the 64-bit values in M2. */ 249static __inline __m64 250_mm_sub_si64 (__m64 __m1, __m64 __m2) 251{ 252 return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2); 253} 254 255/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed 256 saturating arithmetic. */ 257static __inline __m64 258_mm_subs_pi8 (__m64 __m1, __m64 __m2) 259{ 260 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); 261} 262 | 380/* Add the 64-bit values in M1 to the 64-bit values in M2. */ 381static __inline __m64 382_mm_sub_si64 (__m64 __m1, __m64 __m2) 383{ 384 return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2); 385} 386 387/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed 388 saturating arithmetic. */ 389static __inline __m64 390_mm_subs_pi8 (__m64 __m1, __m64 __m2) 391{ 392 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); 393} 394 |
395static __inline __m64 396_m_psubsb (__m64 __m1, __m64 __m2) 397{ 398 return _mm_subs_pi8 (__m1, __m2); 399} 400 |
|
263/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 264 signed saturating arithmetic. */ 265static __inline __m64 266_mm_subs_pi16 (__m64 __m1, __m64 __m2) 267{ 268 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); 269} 270 | 401/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 402 signed saturating arithmetic. */ 403static __inline __m64 404_mm_subs_pi16 (__m64 __m1, __m64 __m2) 405{ 406 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); 407} 408 |
409static __inline __m64 410_m_psubsw (__m64 __m1, __m64 __m2) 411{ 412 return _mm_subs_pi16 (__m1, __m2); 413} 414 |
|
271/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using 272 unsigned saturating arithmetic. */ 273static __inline __m64 274_mm_subs_pu8 (__m64 __m1, __m64 __m2) 275{ 276 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); 277} 278 | 415/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using 416 unsigned saturating arithmetic. */ 417static __inline __m64 418_mm_subs_pu8 (__m64 __m1, __m64 __m2) 419{ 420 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); 421} 422 |
423static __inline __m64 424_m_psubusb (__m64 __m1, __m64 __m2) 425{ 426 return _mm_subs_pu8 (__m1, __m2); 427} 428 |
|
279/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 280 unsigned saturating arithmetic. */ 281static __inline __m64 282_mm_subs_pu16 (__m64 __m1, __m64 __m2) 283{ 284 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); 285} 286 | 429/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 430 unsigned saturating arithmetic. */ 431static __inline __m64 432_mm_subs_pu16 (__m64 __m1, __m64 __m2) 433{ 434 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); 435} 436 |
437static __inline __m64 438_m_psubusw (__m64 __m1, __m64 __m2) 439{ 440 return _mm_subs_pu16 (__m1, __m2); 441} 442 |
|
287/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing 288 four 32-bit intermediate results, which are then summed by pairs to 289 produce two 32-bit results. */ 290static __inline __m64 291_mm_madd_pi16 (__m64 __m1, __m64 __m2) 292{ 293 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); 294} 295 | 443/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing 444 four 32-bit intermediate results, which are then summed by pairs to 445 produce two 32-bit results. */ 446static __inline __m64 447_mm_madd_pi16 (__m64 __m1, __m64 __m2) 448{ 449 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); 450} 451 |
452static __inline __m64 453_m_pmaddwd (__m64 __m1, __m64 __m2) 454{ 455 return _mm_madd_pi16 (__m1, __m2); 456} 457 |
|
296/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in 297 M2 and produce the high 16 bits of the 32-bit results. */ 298static __inline __m64 299_mm_mulhi_pi16 (__m64 __m1, __m64 __m2) 300{ 301 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); 302} 303 | 458/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in 459 M2 and produce the high 16 bits of the 32-bit results. */ 460static __inline __m64 461_mm_mulhi_pi16 (__m64 __m1, __m64 __m2) 462{ 463 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); 464} 465 |
466static __inline __m64 467_m_pmulhw (__m64 __m1, __m64 __m2) 468{ 469 return _mm_mulhi_pi16 (__m1, __m2); 470} 471 |
|
304/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce 305 the low 16 bits of the results. */ 306static __inline __m64 307_mm_mullo_pi16 (__m64 __m1, __m64 __m2) 308{ 309 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); 310} 311 | 472/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce 473 the low 16 bits of the results. */ 474static __inline __m64 475_mm_mullo_pi16 (__m64 __m1, __m64 __m2) 476{ 477 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); 478} 479 |
480static __inline __m64 481_m_pmullw (__m64 __m1, __m64 __m2) 482{ 483 return _mm_mullo_pi16 (__m1, __m2); 484} 485 |
|
312/* Shift four 16-bit values in M left by COUNT. */ 313static __inline __m64 314_mm_sll_pi16 (__m64 __m, __m64 __count) 315{ 316 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count); 317} 318 319static __inline __m64 | 486/* Shift four 16-bit values in M left by COUNT. */ 487static __inline __m64 488_mm_sll_pi16 (__m64 __m, __m64 __count) 489{ 490 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count); 491} 492 493static __inline __m64 |
494_m_psllw (__m64 __m, __m64 __count) 495{ 496 return _mm_sll_pi16 (__m, __count); 497} 498 499static __inline __m64 |
|
320_mm_slli_pi16 (__m64 __m, int __count) 321{ 322 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); 323} 324 | 500_mm_slli_pi16 (__m64 __m, int __count) 501{ 502 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); 503} 504 |
505static __inline __m64 506_m_psllwi (__m64 __m, int __count) 507{ 508 return _mm_slli_pi16 (__m, __count); 509} 510 |
|
325/* Shift two 32-bit values in M left by COUNT. */ 326static __inline __m64 327_mm_sll_pi32 (__m64 __m, __m64 __count) 328{ 329 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count); 330} 331 332static __inline __m64 | 511/* Shift two 32-bit values in M left by COUNT. */ 512static __inline __m64 513_mm_sll_pi32 (__m64 __m, __m64 __count) 514{ 515 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count); 516} 517 518static __inline __m64 |
519_m_pslld (__m64 __m, __m64 __count) 520{ 521 return _mm_sll_pi32 (__m, __count); 522} 523 524static __inline __m64 |
|
333_mm_slli_pi32 (__m64 __m, int __count) 334{ 335 return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); 336} 337 | 525_mm_slli_pi32 (__m64 __m, int __count) 526{ 527 return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); 528} 529 |
530static __inline __m64 531_m_pslldi (__m64 __m, int __count) 532{ 533 return _mm_slli_pi32 (__m, __count); 534} 535 |
|
338/* Shift the 64-bit value in M left by COUNT. */ 339static __inline __m64 340_mm_sll_si64 (__m64 __m, __m64 __count) 341{ 342 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 343} 344 345static __inline __m64 | 536/* Shift the 64-bit value in M left by COUNT. */ 537static __inline __m64 538_mm_sll_si64 (__m64 __m, __m64 __count) 539{ 540 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 541} 542 543static __inline __m64 |
544_m_psllq (__m64 __m, __m64 __count) 545{ 546 return _mm_sll_si64 (__m, __count); 547} 548 549static __inline __m64 |
|
346_mm_slli_si64 (__m64 __m, int __count) 347{ 348 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 349} 350 | 550_mm_slli_si64 (__m64 __m, int __count) 551{ 552 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 553} 554 |
555static __inline __m64 556_m_psllqi (__m64 __m, int __count) 557{ 558 return _mm_slli_si64 (__m, __count); 559} 560 |
|
351/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ 352static __inline __m64 353_mm_sra_pi16 (__m64 __m, __m64 __count) 354{ 355 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count); 356} 357 358static __inline __m64 | 561/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ 562static __inline __m64 563_mm_sra_pi16 (__m64 __m, __m64 __count) 564{ 565 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count); 566} 567 568static __inline __m64 |
569_m_psraw (__m64 __m, __m64 __count) 570{ 571 return _mm_sra_pi16 (__m, __count); 572} 573 574static __inline __m64 |
|
359_mm_srai_pi16 (__m64 __m, int __count) 360{ 361 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); 362} 363 | 575_mm_srai_pi16 (__m64 __m, int __count) 576{ 577 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); 578} 579 |
580static __inline __m64 581_m_psrawi (__m64 __m, int __count) 582{ 583 return _mm_srai_pi16 (__m, __count); 584} 585 |
|
364/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ 365static __inline __m64 366_mm_sra_pi32 (__m64 __m, __m64 __count) 367{ 368 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count); 369} 370 371static __inline __m64 | 586/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ 587static __inline __m64 588_mm_sra_pi32 (__m64 __m, __m64 __count) 589{ 590 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count); 591} 592 593static __inline __m64 |
594_m_psrad (__m64 __m, __m64 __count) 595{ 596 return _mm_sra_pi32 (__m, __count); 597} 598 599static __inline __m64 |
|
372_mm_srai_pi32 (__m64 __m, int __count) 373{ 374 return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); 375} 376 | 600_mm_srai_pi32 (__m64 __m, int __count) 601{ 602 return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); 603} 604 |
605static __inline __m64 606_m_psradi (__m64 __m, int __count) 607{ 608 return _mm_srai_pi32 (__m, __count); 609} 610 |
|
377/* Shift four 16-bit values in M right by COUNT; shift in zeros. */ 378static __inline __m64 379_mm_srl_pi16 (__m64 __m, __m64 __count) 380{ 381 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count); 382} 383 384static __inline __m64 | 611/* Shift four 16-bit values in M right by COUNT; shift in zeros. */ 612static __inline __m64 613_mm_srl_pi16 (__m64 __m, __m64 __count) 614{ 615 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count); 616} 617 618static __inline __m64 |
619_m_psrlw (__m64 __m, __m64 __count) 620{ 621 return _mm_srl_pi16 (__m, __count); 622} 623 624static __inline __m64 |
|
385_mm_srli_pi16 (__m64 __m, int __count) 386{ 387 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); 388} 389 | 625_mm_srli_pi16 (__m64 __m, int __count) 626{ 627 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); 628} 629 |
630static __inline __m64 631_m_psrlwi (__m64 __m, int __count) 632{ 633 return _mm_srli_pi16 (__m, __count); 634} 635 |
|
390/* Shift two 32-bit values in M right by COUNT; shift in zeros. */ 391static __inline __m64 392_mm_srl_pi32 (__m64 __m, __m64 __count) 393{ 394 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count); 395} 396 397static __inline __m64 | 636/* Shift two 32-bit values in M right by COUNT; shift in zeros. */ 637static __inline __m64 638_mm_srl_pi32 (__m64 __m, __m64 __count) 639{ 640 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count); 641} 642 643static __inline __m64 |
644_m_psrld (__m64 __m, __m64 __count) 645{ 646 return _mm_srl_pi32 (__m, __count); 647} 648 649static __inline __m64 |
|
398_mm_srli_pi32 (__m64 __m, int __count) 399{ 400 return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); 401} 402 | 650_mm_srli_pi32 (__m64 __m, int __count) 651{ 652 return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); 653} 654 |
655static __inline __m64 656_m_psrldi (__m64 __m, int __count) 657{ 658 return _mm_srli_pi32 (__m, __count); 659} 660 |
|
403/* Shift the 64-bit value in M left by COUNT; shift in zeros. */ 404static __inline __m64 405_mm_srl_si64 (__m64 __m, __m64 __count) 406{ 407 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 408} 409 410static __inline __m64 | 661/* Shift the 64-bit value in M left by COUNT; shift in zeros. */ 662static __inline __m64 663_mm_srl_si64 (__m64 __m, __m64 __count) 664{ 665 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 666} 667 668static __inline __m64 |
669_m_psrlq (__m64 __m, __m64 __count) 670{ 671 return _mm_srl_si64 (__m, __count); 672} 673 674static __inline __m64 |
|
411_mm_srli_si64 (__m64 __m, int __count) 412{ 413 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 414} 415 | 675_mm_srli_si64 (__m64 __m, int __count) 676{ 677 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 678} 679 |
680static __inline __m64 681_m_psrlqi (__m64 __m, int __count) 682{ 683 return _mm_srli_si64 (__m, __count); 684} 685 |
|
416/* Bit-wise AND the 64-bit values in M1 and M2. */ 417static __inline __m64 418_mm_and_si64 (__m64 __m1, __m64 __m2) 419{ 420 return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2); 421} 422 | 686/* Bit-wise AND the 64-bit values in M1 and M2. */ 687static __inline __m64 688_mm_and_si64 (__m64 __m1, __m64 __m2) 689{ 690 return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2); 691} 692 |
693static __inline __m64 694_m_pand (__m64 __m1, __m64 __m2) 695{ 696 return _mm_and_si64 (__m1, __m2); 697} 698 |
|
423/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 424 64-bit value in M2. */ 425static __inline __m64 426_mm_andnot_si64 (__m64 __m1, __m64 __m2) 427{ 428 return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2); 429} 430 | 699/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 700 64-bit value in M2. */ 701static __inline __m64 702_mm_andnot_si64 (__m64 __m1, __m64 __m2) 703{ 704 return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2); 705} 706 |
707static __inline __m64 708_m_pandn (__m64 __m1, __m64 __m2) 709{ 710 return _mm_andnot_si64 (__m1, __m2); 711} 712 |
|
431/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ 432static __inline __m64 433_mm_or_si64 (__m64 __m1, __m64 __m2) 434{ 435 return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2); 436} 437 | 713/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ 714static __inline __m64 715_mm_or_si64 (__m64 __m1, __m64 __m2) 716{ 717 return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2); 718} 719 |
720static __inline __m64 721_m_por (__m64 __m1, __m64 __m2) 722{ 723 return _mm_or_si64 (__m1, __m2); 724} 725 |
|
438/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ 439static __inline __m64 440_mm_xor_si64 (__m64 __m1, __m64 __m2) 441{ 442 return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2); 443} 444 | 726/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ 727static __inline __m64 728_mm_xor_si64 (__m64 __m1, __m64 __m2) 729{ 730 return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2); 731} 732 |
733static __inline __m64 734_m_pxor (__m64 __m1, __m64 __m2) 735{ 736 return _mm_xor_si64 (__m1, __m2); 737} 738 |
|
445/* Compare eight 8-bit values. The result of the comparison is 0xFF if the 446 test is true and zero if false. */ 447static __inline __m64 448_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) 449{ 450 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2); 451} 452 453static __inline __m64 | 739/* Compare eight 8-bit values. The result of the comparison is 0xFF if the 740 test is true and zero if false. */ 741static __inline __m64 742_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) 743{ 744 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2); 745} 746 747static __inline __m64 |
748_m_pcmpeqb (__m64 __m1, __m64 __m2) 749{ 750 return _mm_cmpeq_pi8 (__m1, __m2); 751} 752 753static __inline __m64 |
|
454_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) 455{ 456 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); 457} 458 | 754_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) 755{ 756 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); 757} 758 |
759static __inline __m64 760_m_pcmpgtb (__m64 __m1, __m64 __m2) 761{ 762 return _mm_cmpgt_pi8 (__m1, __m2); 763} 764 |
|
459/* Compare four 16-bit values. The result of the comparison is 0xFFFF if 460 the test is true and zero if false. */ 461static __inline __m64 462_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) 463{ 464 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2); 465} 466 467static __inline __m64 | 765/* Compare four 16-bit values. The result of the comparison is 0xFFFF if 766 the test is true and zero if false. */ 767static __inline __m64 768_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) 769{ 770 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2); 771} 772 773static __inline __m64 |
774_m_pcmpeqw (__m64 __m1, __m64 __m2) 775{ 776 return _mm_cmpeq_pi16 (__m1, __m2); 777} 778 779static __inline __m64 |
|
468_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) 469{ 470 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); 471} 472 | 780_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) 781{ 782 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); 783} 784 |
785static __inline __m64 786_m_pcmpgtw (__m64 __m1, __m64 __m2) 787{ 788 return _mm_cmpgt_pi16 (__m1, __m2); 789} 790 |
|
473/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if 474 the test is true and zero if false. */ 475static __inline __m64 476_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) 477{ 478 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2); 479} 480 481static __inline __m64 | 791/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if 792 the test is true and zero if false. */ 793static __inline __m64 794_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) 795{ 796 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2); 797} 798 799static __inline __m64 |
800_m_pcmpeqd (__m64 __m1, __m64 __m2) 801{ 802 return _mm_cmpeq_pi32 (__m1, __m2); 803} 804 805static __inline __m64 |
|
482_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) 483{ 484 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); 485} 486 | 806_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) 807{ 808 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); 809} 810 |
811static __inline __m64 812_m_pcmpgtd (__m64 __m1, __m64 __m2) 813{ 814 return _mm_cmpgt_pi32 (__m1, __m2); 815} 816 |
|
487/* Creates a 64-bit zero. */ 488static __inline __m64 489_mm_setzero_si64 (void) 490{ 491 return (__m64)__builtin_ia32_mmx_zero (); 492} 493 494/* Creates a vector of two 32-bit values; I0 is least significant. */ --- 74 unchanged lines hidden (view full) --- 569/* Creates a vector of four 16-bit values, all elements containing W. */ 570static __inline __m64 571_mm_set1_pi16 (short __w) 572{ 573 unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w; 574 return _mm_set1_pi32 (__i); 575} 576 | 817/* Creates a 64-bit zero. */ 818static __inline __m64 819_mm_setzero_si64 (void) 820{ 821 return (__m64)__builtin_ia32_mmx_zero (); 822} 823 824/* Creates a vector of two 32-bit values; I0 is least significant. */ --- 74 unchanged lines hidden (view full) --- 899/* Creates a vector of four 16-bit values, all elements containing W. */ 900static __inline __m64 901_mm_set1_pi16 (short __w) 902{ 903 unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w; 904 return _mm_set1_pi32 (__i); 905} 906 |
577/* Creates a vector of four 16-bit values, all elements containing B. */ | 907/* Creates a vector of eight 8-bit values, all elements containing B. */ |
578static __inline __m64 579_mm_set1_pi8 (char __b) 580{ 581 unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b; 582 unsigned int __i = __w << 16 | __w; 583 return _mm_set1_pi32 (__i); 584} 585 586#endif /* __MMX__ */ 587#endif /* _MMINTRIN_H_INCLUDED */ | 908static __inline __m64 909_mm_set1_pi8 (char __b) 910{ 911 unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b; 912 unsigned int __i = __w << 16 | __w; 913 return _mm_set1_pi32 (__i); 914} 915 916#endif /* __MMX__ */ 917#endif /* _MMINTRIN_H_INCLUDED */ |