Cross Reference: /freebsd-10-stable/contrib/gcc/config/i386/mmintrin.h

Deleted Added

sdiff udiff text old ( 117395 ) new ( 122180 )

full compact

mmintrin.h (117395)	mmintrin.h (122180)
1/* Copyright (C) 2002 Free Software Foundation, Inc.	1/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
2 3 This file is part of GNU CC. 4 5 GNU CC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 --- 10 unchanged lines hidden (view full) --- 20/* As a special exception, if you include this header file into source 21 files compiled by GCC, this header file does not by itself cause 22 the resulting executable to be covered by the GNU General Public 23 License. This exception does not however invalidate any other 24 reasons why the executable file might be covered by the GNU General 25 Public License. / 26 27/ Implemented from the specification included in the Intel C++ Compiler	2 3 This file is part of GNU CC. 4 5 GNU CC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2, or (at your option) 8 any later version. 9 --- 10 unchanged lines hidden (view full) --- 20/* As a special exception, if you include this header file into source 21 files compiled by GCC, this header file does not by itself cause 22 the resulting executable to be covered by the GNU General Public 23 License. This exception does not however invalidate any other 24 reasons why the executable file might be covered by the GNU General 25 Public License. / 26 27/ Implemented from the specification included in the Intel C++ Compiler
28 User Guide and Reference, version 5.0. */	28 User Guide and Reference, version 8.0. */
29 30#ifndef _MMINTRIN_H_INCLUDED 31#define _MMINTRIN_H_INCLUDED 32 33#ifndef __MMX__ 34# error "MMX instruction set not enabled" 35#else 36/* The data type intended for user use. / --- 6 unchanged lines hidden* (view full) --- 43 44/* Empty the multimedia state. */ 45static __inline void 46_mm_empty (void) 47{ 48 __builtin_ia32_emms (); 49} 50	29 30#ifndef _MMINTRIN_H_INCLUDED 31#define _MMINTRIN_H_INCLUDED 32 33#ifndef __MMX__ 34# error "MMX instruction set not enabled" 35#else 36/* The data type intended for user use. / --- 6 unchanged lines hidden* (view full) --- 43 44/* Empty the multimedia state. */ 45static __inline void 46_mm_empty (void) 47{ 48 __builtin_ia32_emms (); 49} 50
	51static __inline void 52_m_empty (void) 53{ 54 _mm_empty (); 55} 56
51/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ 52static __inline __m64 53_mm_cvtsi32_si64 (int __i) 54{ 55 long long __tmp = (unsigned int)__i; 56 return (__m64) __tmp; 57} 58	57/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ 58static __inline __m64 59_mm_cvtsi32_si64 (int __i) 60{ 61 long long __tmp = (unsigned int)__i; 62 return (__m64) __tmp; 63} 64
	65static __inline __m64 66_m_from_int (int __i) 67{ 68 return _mm_cvtsi32_si64 (__i); 69} 70
59#ifdef __x86_64__ 60/* Convert I to a __m64 object. / 61static __inline __m64 62_mm_cvtsi64x_si64 (long long __i) 63{ 64 return (__m64) __i; 65} 66 --- 8 unchanged lines hidden* (view full) --- 75/* Convert the lower 32 bits of the __m64 object into an integer. */ 76static __inline int 77_mm_cvtsi64_si32 (__m64 __i) 78{ 79 long long __tmp = (long long)__i; 80 return __tmp; 81} 82	71#ifdef __x86_64__ 72/* Convert I to a __m64 object. / 73static __inline __m64 74_mm_cvtsi64x_si64 (long long __i) 75{ 76 return (__m64) __i; 77} 78 --- 8 unchanged lines hidden* (view full) --- 87/* Convert the lower 32 bits of the __m64 object into an integer. */ 88static __inline int 89_mm_cvtsi64_si32 (__m64 __i) 90{ 91 long long __tmp = (long long)__i; 92 return __tmp; 93} 94
	95static __inline int 96_m_to_int (__m64 __i) 97{ 98 return _mm_cvtsi64_si32 (__i); 99} 100
83#ifdef __x86_64__ 84/* Convert the lower 32 bits of the __m64 object into an integer. / 85static __inline long long 86_mm_cvtsi64_si64x (__m64 __i) 87{ 88 return (long long)__i; 89} 90#endif 91 92/ Pack the four 16-bit values from M1 into the lower four 8-bit values of 93 the result, and the four 16-bit values from M2 into the upper four 8-bit 94 values of the result, all with signed saturation. / 95static __inline __m64 96_mm_packs_pi16 (__m64 __m1, __m64 __m2) 97{ 98 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); 99} 100*	101#ifdef __x86_64__ 102/* Convert the lower 32 bits of the __m64 object into an integer. / 103static __inline long long 104_mm_cvtsi64_si64x (__m64 __i) 105{ 106* return (long long)__i; 107} 108#endif 109 110/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 111 the result, and the four 16-bit values from M2 into the upper four 8-bit 112 values of the result, all with signed saturation. / 113static __inline __m64 114_mm_packs_pi16 (__m64 __m1, __m64 __m2) 115{ 116* return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); 117} 118
	119static __inline __m64 120_m_packsswb (__m64 __m1, __m64 __m2) 121{ 122 return _mm_packs_pi16 (__m1, __m2); 123} 124
101/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of 102 the result, and the two 32-bit values from M2 into the upper two 16-bit 103 values of the result, all with signed saturation. / 104static __inline __m64 105_mm_packs_pi32 (__m64 __m1, __m64 __m2) 106{ 107* return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); 108} 109	125/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of 126 the result, and the two 32-bit values from M2 into the upper two 16-bit 127 values of the result, all with signed saturation. / 128static __inline __m64 129_mm_packs_pi32 (__m64 __m1, __m64 __m2) 130{ 131* return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); 132} 133
	134static __inline __m64 135_m_packssdw (__m64 __m1, __m64 __m2) 136{ 137 return _mm_packs_pi32 (__m1, __m2); 138} 139
110/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 111 the result, and the four 16-bit values from M2 into the upper four 8-bit 112 values of the result, all with unsigned saturation. / 113static __inline __m64 114_mm_packs_pu16 (__m64 __m1, __m64 __m2) 115{ 116* return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); 117} 118	140/* Pack the four 16-bit values from M1 into the lower four 8-bit values of 141 the result, and the four 16-bit values from M2 into the upper four 8-bit 142 values of the result, all with unsigned saturation. / 143static __inline __m64 144_mm_packs_pu16 (__m64 __m1, __m64 __m2) 145{ 146* return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); 147} 148
	149static __inline __m64 150_m_packuswb (__m64 __m1, __m64 __m2) 151{ 152 return _mm_packs_pu16 (__m1, __m2); 153} 154
119/* Interleave the four 8-bit values from the high half of M1 with the four 120 8-bit values from the high half of M2. / 121static __inline __m64 122_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) 123{ 124* return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); 125} 126	155/* Interleave the four 8-bit values from the high half of M1 with the four 156 8-bit values from the high half of M2. / 157static __inline __m64 158_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) 159{ 160* return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); 161} 162
	163static __inline __m64 164_m_punpckhbw (__m64 __m1, __m64 __m2) 165{ 166 return _mm_unpackhi_pi8 (__m1, __m2); 167} 168
127/* Interleave the two 16-bit values from the high half of M1 with the two 128 16-bit values from the high half of M2. / 129static __inline __m64 130_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) 131{ 132* return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); 133} 134	169/* Interleave the two 16-bit values from the high half of M1 with the two 170 16-bit values from the high half of M2. / 171static __inline __m64 172_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) 173{ 174* return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); 175} 176
	177static __inline __m64 178_m_punpckhwd (__m64 __m1, __m64 __m2) 179{ 180 return _mm_unpackhi_pi16 (__m1, __m2); 181} 182
135/* Interleave the 32-bit value from the high half of M1 with the 32-bit 136 value from the high half of M2. / 137static __inline __m64 138_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) 139{ 140* return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); 141} 142	183/* Interleave the 32-bit value from the high half of M1 with the 32-bit 184 value from the high half of M2. / 185static __inline __m64 186_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) 187{ 188* return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); 189} 190
	191static __inline __m64 192_m_punpckhdq (__m64 __m1, __m64 __m2) 193{ 194 return _mm_unpackhi_pi32 (__m1, __m2); 195} 196
143/* Interleave the four 8-bit values from the low half of M1 with the four 144 8-bit values from the low half of M2. / 145static __inline __m64 146_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) 147{ 148* return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); 149} 150	197/* Interleave the four 8-bit values from the low half of M1 with the four 198 8-bit values from the low half of M2. / 199static __inline __m64 200_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) 201{ 202* return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); 203} 204
	205static __inline __m64 206_m_punpcklbw (__m64 __m1, __m64 __m2) 207{ 208 return _mm_unpacklo_pi8 (__m1, __m2); 209} 210
151/* Interleave the two 16-bit values from the low half of M1 with the two 152 16-bit values from the low half of M2. / 153static __inline __m64 154_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) 155{ 156* return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); 157} 158	211/* Interleave the two 16-bit values from the low half of M1 with the two 212 16-bit values from the low half of M2. / 213static __inline __m64 214_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) 215{ 216* return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); 217} 218
	219static __inline __m64 220_m_punpcklwd (__m64 __m1, __m64 __m2) 221{ 222 return _mm_unpacklo_pi16 (__m1, __m2); 223} 224
159/* Interleave the 32-bit value from the low half of M1 with the 32-bit 160 value from the low half of M2. / 161static __inline __m64 162_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) 163{ 164* return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); 165} 166	225/* Interleave the 32-bit value from the low half of M1 with the 32-bit 226 value from the low half of M2. / 227static __inline __m64 228_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) 229{ 230* return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); 231} 232
	233static __inline __m64 234_m_punpckldq (__m64 __m1, __m64 __m2) 235{ 236 return _mm_unpacklo_pi32 (__m1, __m2); 237} 238
167/* Add the 8-bit values in M1 to the 8-bit values in M2. / 168static __inline __m64 169_mm_add_pi8 (__m64 __m1, __m64 __m2) 170{ 171* return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); 172} 173	239/* Add the 8-bit values in M1 to the 8-bit values in M2. / 240static __inline __m64 241_mm_add_pi8 (__m64 __m1, __m64 __m2) 242{ 243* return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); 244} 245
	246static __inline __m64 247_m_paddb (__m64 __m1, __m64 __m2) 248{ 249 return _mm_add_pi8 (__m1, __m2); 250} 251
174/* Add the 16-bit values in M1 to the 16-bit values in M2. / 175static __inline __m64 176_mm_add_pi16 (__m64 __m1, __m64 __m2) 177{ 178* return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); 179} 180	252/* Add the 16-bit values in M1 to the 16-bit values in M2. / 253static __inline __m64 254_mm_add_pi16 (__m64 __m1, __m64 __m2) 255{ 256* return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); 257} 258
	259static __inline __m64 260_m_paddw (__m64 __m1, __m64 __m2) 261{ 262 return _mm_add_pi16 (__m1, __m2); 263} 264
181/* Add the 32-bit values in M1 to the 32-bit values in M2. / 182static __inline __m64 183_mm_add_pi32 (__m64 __m1, __m64 __m2) 184{ 185* return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); 186} 187	265/* Add the 32-bit values in M1 to the 32-bit values in M2. / 266static __inline __m64 267_mm_add_pi32 (__m64 __m1, __m64 __m2) 268{ 269* return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); 270} 271
	272static __inline __m64 273_m_paddd (__m64 __m1, __m64 __m2) 274{ 275 return _mm_add_pi32 (__m1, __m2); 276} 277
188/* Add the 64-bit values in M1 to the 64-bit values in M2. / 189static __inline __m64 190_mm_add_si64 (__m64 __m1, __m64 __m2) 191{ 192* return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2); 193} 194 195/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed 196 saturated arithmetic. / 197static __inline __m64 198_mm_adds_pi8 (__m64 __m1, __m64 __m2) 199{ 200* return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); 201} 202	278/* Add the 64-bit values in M1 to the 64-bit values in M2. / 279static __inline __m64 280_mm_add_si64 (__m64 __m1, __m64 __m2) 281{ 282* return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2); 283} 284 285/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed 286 saturated arithmetic. / 287static __inline __m64 288_mm_adds_pi8 (__m64 __m1, __m64 __m2) 289{ 290* return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); 291} 292
	293static __inline __m64 294_m_paddsb (__m64 __m1, __m64 __m2) 295{ 296 return _mm_adds_pi8 (__m1, __m2); 297} 298
203/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed 204 saturated arithmetic. / 205static __inline __m64 206_mm_adds_pi16 (__m64 __m1, __m64 __m2) 207{ 208* return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); 209} 210	299/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed 300 saturated arithmetic. / 301static __inline __m64 302_mm_adds_pi16 (__m64 __m1, __m64 __m2) 303{ 304* return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); 305} 306
	307static __inline __m64 308_m_paddsw (__m64 __m1, __m64 __m2) 309{ 310 return _mm_adds_pi16 (__m1, __m2); 311} 312
211/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned 212 saturated arithmetic. / 213static __inline __m64 214_mm_adds_pu8 (__m64 __m1, __m64 __m2) 215{ 216* return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); 217} 218	313/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned 314 saturated arithmetic. / 315static __inline __m64 316_mm_adds_pu8 (__m64 __m1, __m64 __m2) 317{ 318* return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); 319} 320
	321static __inline __m64 322_m_paddusb (__m64 __m1, __m64 __m2) 323{ 324 return _mm_adds_pu8 (__m1, __m2); 325} 326
219/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned 220 saturated arithmetic. / 221static __inline __m64 222_mm_adds_pu16 (__m64 __m1, __m64 __m2) 223{ 224* return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); 225} 226	327/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned 328 saturated arithmetic. / 329static __inline __m64 330_mm_adds_pu16 (__m64 __m1, __m64 __m2) 331{ 332* return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); 333} 334
	335static __inline __m64 336_m_paddusw (__m64 __m1, __m64 __m2) 337{ 338 return _mm_adds_pu16 (__m1, __m2); 339} 340
227/* Subtract the 8-bit values in M2 from the 8-bit values in M1. / 228static __inline __m64 229_mm_sub_pi8 (__m64 __m1, __m64 __m2) 230{ 231* return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); 232} 233	341/* Subtract the 8-bit values in M2 from the 8-bit values in M1. / 342static __inline __m64 343_mm_sub_pi8 (__m64 __m1, __m64 __m2) 344{ 345* return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); 346} 347
	348static __inline __m64 349_m_psubb (__m64 __m1, __m64 __m2) 350{ 351 return _mm_sub_pi8 (__m1, __m2); 352} 353
234/* Subtract the 16-bit values in M2 from the 16-bit values in M1. / 235static __inline __m64 236_mm_sub_pi16 (__m64 __m1, __m64 __m2) 237{ 238* return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); 239} 240	354/* Subtract the 16-bit values in M2 from the 16-bit values in M1. / 355static __inline __m64 356_mm_sub_pi16 (__m64 __m1, __m64 __m2) 357{ 358* return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); 359} 360
	361static __inline __m64 362_m_psubw (__m64 __m1, __m64 __m2) 363{ 364 return _mm_sub_pi16 (__m1, __m2); 365} 366
241/* Subtract the 32-bit values in M2 from the 32-bit values in M1. / 242static __inline __m64 243_mm_sub_pi32 (__m64 __m1, __m64 __m2) 244{ 245* return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); 246} 247	367/* Subtract the 32-bit values in M2 from the 32-bit values in M1. / 368static __inline __m64 369_mm_sub_pi32 (__m64 __m1, __m64 __m2) 370{ 371* return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); 372} 373
	374static __inline __m64 375_m_psubd (__m64 __m1, __m64 __m2) 376{ 377 return _mm_sub_pi32 (__m1, __m2); 378} 379
248/* Add the 64-bit values in M1 to the 64-bit values in M2. / 249static __inline __m64 250_mm_sub_si64 (__m64 __m1, __m64 __m2) 251{ 252* return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2); 253} 254 255/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed 256 saturating arithmetic. / 257static __inline __m64 258_mm_subs_pi8 (__m64 __m1, __m64 __m2) 259{ 260* return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); 261} 262	380/* Add the 64-bit values in M1 to the 64-bit values in M2. / 381static __inline __m64 382_mm_sub_si64 (__m64 __m1, __m64 __m2) 383{ 384* return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2); 385} 386 387/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed 388 saturating arithmetic. / 389static __inline __m64 390_mm_subs_pi8 (__m64 __m1, __m64 __m2) 391{ 392* return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); 393} 394
	395static __inline __m64 396_m_psubsb (__m64 __m1, __m64 __m2) 397{ 398 return _mm_subs_pi8 (__m1, __m2); 399} 400
263/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 264 signed saturating arithmetic. / 265static __inline __m64 266_mm_subs_pi16 (__m64 __m1, __m64 __m2) 267{ 268* return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); 269} 270	401/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 402 signed saturating arithmetic. / 403static __inline __m64 404_mm_subs_pi16 (__m64 __m1, __m64 __m2) 405{ 406* return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); 407} 408
	409static __inline __m64 410_m_psubsw (__m64 __m1, __m64 __m2) 411{ 412 return _mm_subs_pi16 (__m1, __m2); 413} 414
271/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using 272 unsigned saturating arithmetic. / 273static __inline __m64 274_mm_subs_pu8 (__m64 __m1, __m64 __m2) 275{ 276* return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); 277} 278	415/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using 416 unsigned saturating arithmetic. / 417static __inline __m64 418_mm_subs_pu8 (__m64 __m1, __m64 __m2) 419{ 420* return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); 421} 422
	423static __inline __m64 424_m_psubusb (__m64 __m1, __m64 __m2) 425{ 426 return _mm_subs_pu8 (__m1, __m2); 427} 428
279/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 280 unsigned saturating arithmetic. / 281static __inline __m64 282_mm_subs_pu16 (__m64 __m1, __m64 __m2) 283{ 284* return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); 285} 286	429/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using 430 unsigned saturating arithmetic. / 431static __inline __m64 432_mm_subs_pu16 (__m64 __m1, __m64 __m2) 433{ 434* return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); 435} 436
	437static __inline __m64 438_m_psubusw (__m64 __m1, __m64 __m2) 439{ 440 return _mm_subs_pu16 (__m1, __m2); 441} 442
287/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing 288 four 32-bit intermediate results, which are then summed by pairs to 289 produce two 32-bit results. / 290static __inline __m64 291_mm_madd_pi16 (__m64 __m1, __m64 __m2) 292{ 293* return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); 294} 295	443/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing 444 four 32-bit intermediate results, which are then summed by pairs to 445 produce two 32-bit results. / 446static __inline __m64 447_mm_madd_pi16 (__m64 __m1, __m64 __m2) 448{ 449* return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); 450} 451
	452static __inline __m64 453_m_pmaddwd (__m64 __m1, __m64 __m2) 454{ 455 return _mm_madd_pi16 (__m1, __m2); 456} 457
296/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in 297 M2 and produce the high 16 bits of the 32-bit results. / 298static __inline __m64 299_mm_mulhi_pi16 (__m64 __m1, __m64 __m2) 300{ 301* return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); 302} 303	458/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in 459 M2 and produce the high 16 bits of the 32-bit results. / 460static __inline __m64 461_mm_mulhi_pi16 (__m64 __m1, __m64 __m2) 462{ 463* return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); 464} 465
	466static __inline __m64 467_m_pmulhw (__m64 __m1, __m64 __m2) 468{ 469 return _mm_mulhi_pi16 (__m1, __m2); 470} 471
304/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce 305 the low 16 bits of the results. / 306static __inline __m64 307_mm_mullo_pi16 (__m64 __m1, __m64 __m2) 308{ 309* return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); 310} 311	472/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce 473 the low 16 bits of the results. / 474static __inline __m64 475_mm_mullo_pi16 (__m64 __m1, __m64 __m2) 476{ 477* return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); 478} 479
	480static __inline __m64 481_m_pmullw (__m64 __m1, __m64 __m2) 482{ 483 return _mm_mullo_pi16 (__m1, __m2); 484} 485
312/* Shift four 16-bit values in M left by COUNT. / 313static __inline __m64 314_mm_sll_pi16 (__m64 __m, __m64 __count) 315{ 316* return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count); 317} 318 319static __inline __m64	486/* Shift four 16-bit values in M left by COUNT. / 487static __inline __m64 488_mm_sll_pi16 (__m64 __m, __m64 __count) 489{ 490* return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count); 491} 492 493static __inline __m64
	494_m_psllw (__m64 __m, __m64 __count) 495{ 496 return _mm_sll_pi16 (__m, __count); 497} 498 499static __inline __m64
320_mm_slli_pi16 (__m64 __m, int __count) 321{ 322 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); 323} 324	500_mm_slli_pi16 (__m64 __m, int __count) 501{ 502 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count); 503} 504
	505static __inline __m64 506_m_psllwi (__m64 __m, int __count) 507{ 508 return _mm_slli_pi16 (__m, __count); 509} 510
325/* Shift two 32-bit values in M left by COUNT. / 326static __inline __m64 327_mm_sll_pi32 (__m64 __m, __m64 __count) 328{ 329* return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count); 330} 331 332static __inline __m64	511/* Shift two 32-bit values in M left by COUNT. / 512static __inline __m64 513_mm_sll_pi32 (__m64 __m, __m64 __count) 514{ 515* return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count); 516} 517 518static __inline __m64
	519_m_pslld (__m64 __m, __m64 __count) 520{ 521 return _mm_sll_pi32 (__m, __count); 522} 523 524static __inline __m64
333_mm_slli_pi32 (__m64 __m, int __count) 334{ 335 return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); 336} 337	525_mm_slli_pi32 (__m64 __m, int __count) 526{ 527 return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count); 528} 529
	530static __inline __m64 531_m_pslldi (__m64 __m, int __count) 532{ 533 return _mm_slli_pi32 (__m, __count); 534} 535
338/* Shift the 64-bit value in M left by COUNT. / 339static __inline __m64 340_mm_sll_si64 (__m64 __m, __m64 __count) 341{ 342* return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 343} 344 345static __inline __m64	536/* Shift the 64-bit value in M left by COUNT. / 537static __inline __m64 538_mm_sll_si64 (__m64 __m, __m64 __count) 539{ 540* return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 541} 542 543static __inline __m64
	544_m_psllq (__m64 __m, __m64 __count) 545{ 546 return _mm_sll_si64 (__m, __count); 547} 548 549static __inline __m64
346_mm_slli_si64 (__m64 __m, int __count) 347{ 348 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 349} 350	550_mm_slli_si64 (__m64 __m, int __count) 551{ 552 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count); 553} 554
	555static __inline __m64 556_m_psllqi (__m64 __m, int __count) 557{ 558 return _mm_slli_si64 (__m, __count); 559} 560
351/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. / 352static __inline __m64 353_mm_sra_pi16 (__m64 __m, __m64 __count) 354{ 355* return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count); 356} 357 358static __inline __m64	561/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. / 562static __inline __m64 563_mm_sra_pi16 (__m64 __m, __m64 __count) 564{ 565* return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count); 566} 567 568static __inline __m64
	569_m_psraw (__m64 __m, __m64 __count) 570{ 571 return _mm_sra_pi16 (__m, __count); 572} 573 574static __inline __m64
359_mm_srai_pi16 (__m64 __m, int __count) 360{ 361 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); 362} 363	575_mm_srai_pi16 (__m64 __m, int __count) 576{ 577 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count); 578} 579
	580static __inline __m64 581_m_psrawi (__m64 __m, int __count) 582{ 583 return _mm_srai_pi16 (__m, __count); 584} 585
364/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. / 365static __inline __m64 366_mm_sra_pi32 (__m64 __m, __m64 __count) 367{ 368* return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count); 369} 370 371static __inline __m64	586/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. / 587static __inline __m64 588_mm_sra_pi32 (__m64 __m, __m64 __count) 589{ 590* return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count); 591} 592 593static __inline __m64
	594_m_psrad (__m64 __m, __m64 __count) 595{ 596 return _mm_sra_pi32 (__m, __count); 597} 598 599static __inline __m64
372_mm_srai_pi32 (__m64 __m, int __count) 373{ 374 return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); 375} 376	600_mm_srai_pi32 (__m64 __m, int __count) 601{ 602 return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count); 603} 604
	605static __inline __m64 606_m_psradi (__m64 __m, int __count) 607{ 608 return _mm_srai_pi32 (__m, __count); 609} 610
377/* Shift four 16-bit values in M right by COUNT; shift in zeros. / 378static __inline __m64 379_mm_srl_pi16 (__m64 __m, __m64 __count) 380{ 381* return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count); 382} 383 384static __inline __m64	611/* Shift four 16-bit values in M right by COUNT; shift in zeros. / 612static __inline __m64 613_mm_srl_pi16 (__m64 __m, __m64 __count) 614{ 615* return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count); 616} 617 618static __inline __m64
	619_m_psrlw (__m64 __m, __m64 __count) 620{ 621 return _mm_srl_pi16 (__m, __count); 622} 623 624static __inline __m64
385_mm_srli_pi16 (__m64 __m, int __count) 386{ 387 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); 388} 389	625_mm_srli_pi16 (__m64 __m, int __count) 626{ 627 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count); 628} 629
	630static __inline __m64 631_m_psrlwi (__m64 __m, int __count) 632{ 633 return _mm_srli_pi16 (__m, __count); 634} 635
390/* Shift two 32-bit values in M right by COUNT; shift in zeros. / 391static __inline __m64 392_mm_srl_pi32 (__m64 __m, __m64 __count) 393{ 394* return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count); 395} 396 397static __inline __m64	636/* Shift two 32-bit values in M right by COUNT; shift in zeros. / 637static __inline __m64 638_mm_srl_pi32 (__m64 __m, __m64 __count) 639{ 640* return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count); 641} 642 643static __inline __m64
	644_m_psrld (__m64 __m, __m64 __count) 645{ 646 return _mm_srl_pi32 (__m, __count); 647} 648 649static __inline __m64
398_mm_srli_pi32 (__m64 __m, int __count) 399{ 400 return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); 401} 402	650_mm_srli_pi32 (__m64 __m, int __count) 651{ 652 return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count); 653} 654
	655static __inline __m64 656_m_psrldi (__m64 __m, int __count) 657{ 658 return _mm_srli_pi32 (__m, __count); 659} 660
403/* Shift the 64-bit value in M left by COUNT; shift in zeros. / 404static __inline __m64 405_mm_srl_si64 (__m64 __m, __m64 __count) 406{ 407* return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 408} 409 410static __inline __m64	661/* Shift the 64-bit value in M left by COUNT; shift in zeros. / 662static __inline __m64 663_mm_srl_si64 (__m64 __m, __m64 __count) 664{ 665* return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 666} 667 668static __inline __m64
	669_m_psrlq (__m64 __m, __m64 __count) 670{ 671 return _mm_srl_si64 (__m, __count); 672} 673 674static __inline __m64
411_mm_srli_si64 (__m64 __m, int __count) 412{ 413 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 414} 415	675_mm_srli_si64 (__m64 __m, int __count) 676{ 677 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count); 678} 679
	680static __inline __m64 681_m_psrlqi (__m64 __m, int __count) 682{ 683 return _mm_srli_si64 (__m, __count); 684} 685
416/* Bit-wise AND the 64-bit values in M1 and M2. / 417static __inline __m64 418_mm_and_si64 (__m64 __m1, __m64 __m2) 419{ 420* return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2); 421} 422	686/* Bit-wise AND the 64-bit values in M1 and M2. / 687static __inline __m64 688_mm_and_si64 (__m64 __m1, __m64 __m2) 689{ 690* return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2); 691} 692
	693static __inline __m64 694_m_pand (__m64 __m1, __m64 __m2) 695{ 696 return _mm_and_si64 (__m1, __m2); 697} 698
423/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 424 64-bit value in M2. / 425static __inline __m64 426_mm_andnot_si64 (__m64 __m1, __m64 __m2) 427{ 428* return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2); 429} 430	699/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the 700 64-bit value in M2. / 701static __inline __m64 702_mm_andnot_si64 (__m64 __m1, __m64 __m2) 703{ 704* return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2); 705} 706
	707static __inline __m64 708_m_pandn (__m64 __m1, __m64 __m2) 709{ 710 return _mm_andnot_si64 (__m1, __m2); 711} 712
431/* Bit-wise inclusive OR the 64-bit values in M1 and M2. / 432static __inline __m64 433_mm_or_si64 (__m64 __m1, __m64 __m2) 434{ 435* return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2); 436} 437	713/* Bit-wise inclusive OR the 64-bit values in M1 and M2. / 714static __inline __m64 715_mm_or_si64 (__m64 __m1, __m64 __m2) 716{ 717* return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2); 718} 719
	720static __inline __m64 721_m_por (__m64 __m1, __m64 __m2) 722{ 723 return _mm_or_si64 (__m1, __m2); 724} 725
438/* Bit-wise exclusive OR the 64-bit values in M1 and M2. / 439static __inline __m64 440_mm_xor_si64 (__m64 __m1, __m64 __m2) 441{ 442* return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2); 443} 444	726/* Bit-wise exclusive OR the 64-bit values in M1 and M2. / 727static __inline __m64 728_mm_xor_si64 (__m64 __m1, __m64 __m2) 729{ 730* return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2); 731} 732
	733static __inline __m64 734_m_pxor (__m64 __m1, __m64 __m2) 735{ 736 return _mm_xor_si64 (__m1, __m2); 737} 738
445/* Compare eight 8-bit values. The result of the comparison is 0xFF if the 446 test is true and zero if false. / 447static __inline __m64 448_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) 449{ 450* return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2); 451} 452 453static __inline __m64	739/* Compare eight 8-bit values. The result of the comparison is 0xFF if the 740 test is true and zero if false. / 741static __inline __m64 742_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) 743{ 744* return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2); 745} 746 747static __inline __m64
	748_m_pcmpeqb (__m64 __m1, __m64 __m2) 749{ 750 return _mm_cmpeq_pi8 (__m1, __m2); 751} 752 753static __inline __m64
454_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) 455{ 456 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); 457} 458	754_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) 755{ 756 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); 757} 758
	759static __inline __m64 760_m_pcmpgtb (__m64 __m1, __m64 __m2) 761{ 762 return _mm_cmpgt_pi8 (__m1, __m2); 763} 764
459/* Compare four 16-bit values. The result of the comparison is 0xFFFF if 460 the test is true and zero if false. / 461static __inline __m64 462_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) 463{ 464* return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2); 465} 466 467static __inline __m64	765/* Compare four 16-bit values. The result of the comparison is 0xFFFF if 766 the test is true and zero if false. / 767static __inline __m64 768_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) 769{ 770* return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2); 771} 772 773static __inline __m64
	774_m_pcmpeqw (__m64 __m1, __m64 __m2) 775{ 776 return _mm_cmpeq_pi16 (__m1, __m2); 777} 778 779static __inline __m64
468_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) 469{ 470 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); 471} 472	780_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) 781{ 782 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); 783} 784
	785static __inline __m64 786_m_pcmpgtw (__m64 __m1, __m64 __m2) 787{ 788 return _mm_cmpgt_pi16 (__m1, __m2); 789} 790
473/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if 474 the test is true and zero if false. / 475static __inline __m64 476_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) 477{ 478* return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2); 479} 480 481static __inline __m64	791/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if 792 the test is true and zero if false. / 793static __inline __m64 794_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) 795{ 796* return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2); 797} 798 799static __inline __m64
	800_m_pcmpeqd (__m64 __m1, __m64 __m2) 801{ 802 return _mm_cmpeq_pi32 (__m1, __m2); 803} 804 805static __inline __m64
482_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) 483{ 484 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); 485} 486	806_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) 807{ 808 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); 809} 810
	811static __inline __m64 812_m_pcmpgtd (__m64 __m1, __m64 __m2) 813{ 814 return _mm_cmpgt_pi32 (__m1, __m2); 815} 816
487/* Creates a 64-bit zero. / 488static __inline __m64 489_mm_setzero_si64 (void) 490{ 491* return (__m64)__builtin_ia32_mmx_zero (); 492} 493 494/* Creates a vector of two 32-bit values; I0 is least significant. / --- 74 unchanged lines hidden* (view full) --- 569/* Creates a vector of four 16-bit values, all elements containing W. / 570static __inline __m64 571_mm_set1_pi16 (short __w) 572{ 573* unsigned int __i = (unsigned short)__w << 16 \| (unsigned short)__w; 574 return _mm_set1_pi32 (__i); 575} 576	817/* Creates a 64-bit zero. / 818static __inline __m64 819_mm_setzero_si64 (void) 820{ 821* return (__m64)__builtin_ia32_mmx_zero (); 822} 823 824/* Creates a vector of two 32-bit values; I0 is least significant. / --- 74 unchanged lines hidden* (view full) --- 899/* Creates a vector of four 16-bit values, all elements containing W. / 900static __inline __m64 901_mm_set1_pi16 (short __w) 902{ 903* unsigned int __i = (unsigned short)__w << 16 \| (unsigned short)__w; 904 return _mm_set1_pi32 (__i); 905} 906
577/* Creates a vector of four 16-bit values, all elements containing B. */	907/* Creates a vector of eight 8-bit values, all elements containing B. */
578static __inline __m64 579_mm_set1_pi8 (char __b) 580{ 581 unsigned int __w = (unsigned char)__b << 8 \| (unsigned char)__b; 582 unsigned int __i = __w << 16 \| __w; 583 return _mm_set1_pi32 (__i); 584} 585 586#endif /* __MMX__ / 587#endif / _MMINTRIN_H_INCLUDED */	908static __inline __m64 909_mm_set1_pi8 (char __b) 910{ 911 unsigned int __w = (unsigned char)__b << 8 \| (unsigned char)__b; 912 unsigned int __i = __w << 16 \| __w; 913 return _mm_set1_pi32 (__i); 914} 915 916#endif /* __MMX__ / 917#endif / _MMINTRIN_H_INCLUDED */