loongson.h revision 1.1.1.8
1/* Intrinsics for ST Microelectronics Loongson-2E/2F SIMD operations.
2
3   Copyright (C) 2008-2018 Free Software Foundation, Inc.
4   Contributed by CodeSourcery.
5
6   This file is part of GCC.
7
8   GCC is free software; you can redistribute it and/or modify it
9   under the terms of the GNU General Public License as published
10   by the Free Software Foundation; either version 3, or (at your
11   option) any later version.
12
13   GCC is distributed in the hope that it will be useful, but WITHOUT
14   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16   License for more details.
17
18   Under Section 7 of GPL version 3, you are granted additional
19   permissions described in the GCC Runtime Library Exception, version
20   3.1, as published by the Free Software Foundation.
21
22   You should have received a copy of the GNU General Public License and
23   a copy of the GCC Runtime Library Exception along with this program;
24   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
25   <http://www.gnu.org/licenses/>.  */
26
27#ifndef _GCC_LOONGSON_H
28#define _GCC_LOONGSON_H
29
30#if !defined(__mips_loongson_vector_rev)
31# error "You must select -march=loongson2e or -march=loongson2f to use loongson.h"
32#endif
33
34#ifdef __cplusplus
35extern "C" {
36#endif
37
38#include <stdint.h>
39
40/* Vectors of unsigned bytes, halfwords and words.  */
41typedef uint8_t uint8x8_t __attribute__((vector_size (8)));
42typedef uint16_t uint16x4_t __attribute__((vector_size (8)));
43typedef uint32_t uint32x2_t __attribute__((vector_size (8)));
44
45/* Vectors of signed bytes, halfwords and words.  */
46typedef int8_t int8x8_t __attribute__((vector_size (8)));
47typedef int16_t int16x4_t __attribute__((vector_size (8)));
48typedef int32_t int32x2_t __attribute__((vector_size (8)));
49
50/* SIMD intrinsics.
51   Unless otherwise noted, calls to the functions below will expand into
52   precisely one machine instruction, modulo any moves required to
53   satisfy register allocation constraints.  */
54
55/* Pack with signed saturation.  */
56__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
57packsswh (int32x2_t s, int32x2_t t)
58{
59  return __builtin_loongson_packsswh (s, t);
60}
61
62__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
63packsshb (int16x4_t s, int16x4_t t)
64{
65  return __builtin_loongson_packsshb (s, t);
66}
67
68/* Pack with unsigned saturation.  */
69__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
70packushb (uint16x4_t s, uint16x4_t t)
71{
72  return __builtin_loongson_packushb (s, t);
73}
74
75/* Vector addition, treating overflow by wraparound.  */
76__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
77paddw_u (uint32x2_t s, uint32x2_t t)
78{
79  return __builtin_loongson_paddw_u (s, t);
80}
81
82__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
83paddh_u (uint16x4_t s, uint16x4_t t)
84{
85  return __builtin_loongson_paddh_u (s, t);
86}
87
88__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
89paddb_u (uint8x8_t s, uint8x8_t t)
90{
91  return __builtin_loongson_paddb_u (s, t);
92}
93
94__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
95paddw_s (int32x2_t s, int32x2_t t)
96{
97  return __builtin_loongson_paddw_s (s, t);
98}
99
100__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
101paddh_s (int16x4_t s, int16x4_t t)
102{
103  return __builtin_loongson_paddh_s (s, t);
104}
105
106__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
107paddb_s (int8x8_t s, int8x8_t t)
108{
109  return __builtin_loongson_paddb_s (s, t);
110}
111
112/* Addition of doubleword integers, treating overflow by wraparound.  */
113__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
114paddd_u (uint64_t s, uint64_t t)
115{
116  return __builtin_loongson_paddd_u (s, t);
117}
118
119__extension__ static __inline int64_t __attribute__ ((__always_inline__))
120paddd_s (int64_t s, int64_t t)
121{
122  return __builtin_loongson_paddd_s (s, t);
123}
124
125/* Vector addition, treating overflow by signed saturation.  */
126__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
127paddsh (int16x4_t s, int16x4_t t)
128{
129  return __builtin_loongson_paddsh (s, t);
130}
131
132__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
133paddsb (int8x8_t s, int8x8_t t)
134{
135  return __builtin_loongson_paddsb (s, t);
136}
137
138/* Vector addition, treating overflow by unsigned saturation.  */
139__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
140paddush (uint16x4_t s, uint16x4_t t)
141{
142  return __builtin_loongson_paddush (s, t);
143}
144
145__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
146paddusb (uint8x8_t s, uint8x8_t t)
147{
148  return __builtin_loongson_paddusb (s, t);
149}
150
151/* Logical AND NOT.  */
152__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
153pandn_ud (uint64_t s, uint64_t t)
154{
155  return __builtin_loongson_pandn_ud (s, t);
156}
157
158__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
159pandn_uw (uint32x2_t s, uint32x2_t t)
160{
161  return __builtin_loongson_pandn_uw (s, t);
162}
163
164__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
165pandn_uh (uint16x4_t s, uint16x4_t t)
166{
167  return __builtin_loongson_pandn_uh (s, t);
168}
169
170__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
171pandn_ub (uint8x8_t s, uint8x8_t t)
172{
173  return __builtin_loongson_pandn_ub (s, t);
174}
175
176__extension__ static __inline int64_t __attribute__ ((__always_inline__))
177pandn_sd (int64_t s, int64_t t)
178{
179  return __builtin_loongson_pandn_sd (s, t);
180}
181
182__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
183pandn_sw (int32x2_t s, int32x2_t t)
184{
185  return __builtin_loongson_pandn_sw (s, t);
186}
187
188__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
189pandn_sh (int16x4_t s, int16x4_t t)
190{
191  return __builtin_loongson_pandn_sh (s, t);
192}
193
194__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
195pandn_sb (int8x8_t s, int8x8_t t)
196{
197  return __builtin_loongson_pandn_sb (s, t);
198}
199
200/* Average.  */
201__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
202pavgh (uint16x4_t s, uint16x4_t t)
203{
204  return __builtin_loongson_pavgh (s, t);
205}
206
207__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
208pavgb (uint8x8_t s, uint8x8_t t)
209{
210  return __builtin_loongson_pavgb (s, t);
211}
212
213/* Equality test.  */
214__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
215pcmpeqw_u (uint32x2_t s, uint32x2_t t)
216{
217  return __builtin_loongson_pcmpeqw_u (s, t);
218}
219
220__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
221pcmpeqh_u (uint16x4_t s, uint16x4_t t)
222{
223  return __builtin_loongson_pcmpeqh_u (s, t);
224}
225
226__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
227pcmpeqb_u (uint8x8_t s, uint8x8_t t)
228{
229  return __builtin_loongson_pcmpeqb_u (s, t);
230}
231
232__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
233pcmpeqw_s (int32x2_t s, int32x2_t t)
234{
235  return __builtin_loongson_pcmpeqw_s (s, t);
236}
237
238__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
239pcmpeqh_s (int16x4_t s, int16x4_t t)
240{
241  return __builtin_loongson_pcmpeqh_s (s, t);
242}
243
244__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
245pcmpeqb_s (int8x8_t s, int8x8_t t)
246{
247  return __builtin_loongson_pcmpeqb_s (s, t);
248}
249
250/* Greater-than test.  */
251__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
252pcmpgtw_u (uint32x2_t s, uint32x2_t t)
253{
254  return __builtin_loongson_pcmpgtw_u (s, t);
255}
256
257__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
258pcmpgth_u (uint16x4_t s, uint16x4_t t)
259{
260  return __builtin_loongson_pcmpgth_u (s, t);
261}
262
263__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
264pcmpgtb_u (uint8x8_t s, uint8x8_t t)
265{
266  return __builtin_loongson_pcmpgtb_u (s, t);
267}
268
269__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
270pcmpgtw_s (int32x2_t s, int32x2_t t)
271{
272  return __builtin_loongson_pcmpgtw_s (s, t);
273}
274
275__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
276pcmpgth_s (int16x4_t s, int16x4_t t)
277{
278  return __builtin_loongson_pcmpgth_s (s, t);
279}
280
281__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
282pcmpgtb_s (int8x8_t s, int8x8_t t)
283{
284  return __builtin_loongson_pcmpgtb_s (s, t);
285}
286
287/* Extract halfword.  */
288__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
289pextrh_u (uint16x4_t s, int field /* 0--3 */)
290{
291  return __builtin_loongson_pextrh_u (s, field);
292}
293
294__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
295pextrh_s (int16x4_t s, int field /* 0--3 */)
296{
297  return __builtin_loongson_pextrh_s (s, field);
298}
299
300/* Insert halfword.  */
301__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
302pinsrh_0_u (uint16x4_t s, uint16x4_t t)
303{
304  return __builtin_loongson_pinsrh_0_u (s, t);
305}
306
307__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
308pinsrh_1_u (uint16x4_t s, uint16x4_t t)
309{
310  return __builtin_loongson_pinsrh_1_u (s, t);
311}
312
313__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
314pinsrh_2_u (uint16x4_t s, uint16x4_t t)
315{
316  return __builtin_loongson_pinsrh_2_u (s, t);
317}
318
319__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
320pinsrh_3_u (uint16x4_t s, uint16x4_t t)
321{
322  return __builtin_loongson_pinsrh_3_u (s, t);
323}
324
325__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
326pinsrh_0_s (int16x4_t s, int16x4_t t)
327{
328  return __builtin_loongson_pinsrh_0_s (s, t);
329}
330
331__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
332pinsrh_1_s (int16x4_t s, int16x4_t t)
333{
334  return __builtin_loongson_pinsrh_1_s (s, t);
335}
336
337__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
338pinsrh_2_s (int16x4_t s, int16x4_t t)
339{
340  return __builtin_loongson_pinsrh_2_s (s, t);
341}
342
343__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
344pinsrh_3_s (int16x4_t s, int16x4_t t)
345{
346  return __builtin_loongson_pinsrh_3_s (s, t);
347}
348
349/* Multiply and add.  */
350__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
351pmaddhw (int16x4_t s, int16x4_t t)
352{
353  return __builtin_loongson_pmaddhw (s, t);
354}
355
356/* Maximum of signed halfwords.  */
357__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
358pmaxsh (int16x4_t s, int16x4_t t)
359{
360  return __builtin_loongson_pmaxsh (s, t);
361}
362
363/* Maximum of unsigned bytes.  */
364__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
365pmaxub (uint8x8_t s, uint8x8_t t)
366{
367  return __builtin_loongson_pmaxub (s, t);
368}
369
370/* Minimum of signed halfwords.  */
371__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
372pminsh (int16x4_t s, int16x4_t t)
373{
374  return __builtin_loongson_pminsh (s, t);
375}
376
377/* Minimum of unsigned bytes.  */
378__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
379pminub (uint8x8_t s, uint8x8_t t)
380{
381  return __builtin_loongson_pminub (s, t);
382}
383
384/* Move byte mask.  */
385__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
386pmovmskb_u (uint8x8_t s)
387{
388  return __builtin_loongson_pmovmskb_u (s);
389}
390
391__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
392pmovmskb_s (int8x8_t s)
393{
394  return __builtin_loongson_pmovmskb_s (s);
395}
396
397/* Multiply unsigned integers and store high result.  */
398__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
399pmulhuh (uint16x4_t s, uint16x4_t t)
400{
401  return __builtin_loongson_pmulhuh (s, t);
402}
403
404/* Multiply signed integers and store high result.  */
405__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
406pmulhh (int16x4_t s, int16x4_t t)
407{
408  return __builtin_loongson_pmulhh (s, t);
409}
410
411/* Multiply signed integers and store low result.  */
412__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
413pmullh (int16x4_t s, int16x4_t t)
414{
415  return __builtin_loongson_pmullh (s, t);
416}
417
418/* Multiply unsigned word integers.  */
419__extension__ static __inline int64_t __attribute__ ((__always_inline__))
420pmuluw (uint32x2_t s, uint32x2_t t)
421{
422  return __builtin_loongson_pmuluw (s, t);
423}
424
425/* Absolute difference.  */
426__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
427pasubub (uint8x8_t s, uint8x8_t t)
428{
429  return __builtin_loongson_pasubub (s, t);
430}
431
432/* Sum of unsigned byte integers.  */
433__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
434biadd (uint8x8_t s)
435{
436  return __builtin_loongson_biadd (s);
437}
438
439/* Sum of absolute differences.
440   Note that this intrinsic expands into two machine instructions:
441   PASUBUB followed by BIADD.  */
442__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
443psadbh (uint8x8_t s, uint8x8_t t)
444{
445  return __builtin_loongson_psadbh (s, t);
446}
447
448/* Shuffle halfwords.  */
449__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
450pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order)
451{
452  return __builtin_loongson_pshufh_u (s, order);
453}
454
455__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
456pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order)
457{
458  return __builtin_loongson_pshufh_s (s, order);
459}
460
461/* Shift left logical.  */
462__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
463psllh_u (uint16x4_t s, uint8_t amount)
464{
465  return __builtin_loongson_psllh_u (s, amount);
466}
467
468__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
469psllh_s (int16x4_t s, uint8_t amount)
470{
471  return __builtin_loongson_psllh_s (s, amount);
472}
473
474__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
475psllw_u (uint32x2_t s, uint8_t amount)
476{
477  return __builtin_loongson_psllw_u (s, amount);
478}
479
480__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
481psllw_s (int32x2_t s, uint8_t amount)
482{
483  return __builtin_loongson_psllw_s (s, amount);
484}
485
486/* Shift right logical.  */
487__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
488psrlh_u (uint16x4_t s, uint8_t amount)
489{
490  return __builtin_loongson_psrlh_u (s, amount);
491}
492
493__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
494psrlh_s (int16x4_t s, uint8_t amount)
495{
496  return __builtin_loongson_psrlh_s (s, amount);
497}
498
499__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
500psrlw_u (uint32x2_t s, uint8_t amount)
501{
502  return __builtin_loongson_psrlw_u (s, amount);
503}
504
505__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
506psrlw_s (int32x2_t s, uint8_t amount)
507{
508  return __builtin_loongson_psrlw_s (s, amount);
509}
510
511/* Shift right arithmetic.  */
512__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
513psrah_u (uint16x4_t s, uint8_t amount)
514{
515  return __builtin_loongson_psrah_u (s, amount);
516}
517
518__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
519psrah_s (int16x4_t s, uint8_t amount)
520{
521  return __builtin_loongson_psrah_s (s, amount);
522}
523
524__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
525psraw_u (uint32x2_t s, uint8_t amount)
526{
527  return __builtin_loongson_psraw_u (s, amount);
528}
529
530__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
531psraw_s (int32x2_t s, uint8_t amount)
532{
533  return __builtin_loongson_psraw_s (s, amount);
534}
535
536/* Vector subtraction, treating overflow by wraparound.  */
537__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
538psubw_u (uint32x2_t s, uint32x2_t t)
539{
540  return __builtin_loongson_psubw_u (s, t);
541}
542
543__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
544psubh_u (uint16x4_t s, uint16x4_t t)
545{
546  return __builtin_loongson_psubh_u (s, t);
547}
548
549__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
550psubb_u (uint8x8_t s, uint8x8_t t)
551{
552  return __builtin_loongson_psubb_u (s, t);
553}
554
555__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
556psubw_s (int32x2_t s, int32x2_t t)
557{
558  return __builtin_loongson_psubw_s (s, t);
559}
560
561__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
562psubh_s (int16x4_t s, int16x4_t t)
563{
564  return __builtin_loongson_psubh_s (s, t);
565}
566
567__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
568psubb_s (int8x8_t s, int8x8_t t)
569{
570  return __builtin_loongson_psubb_s (s, t);
571}
572
573/* Subtraction of doubleword integers, treating overflow by wraparound.  */
574__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
575psubd_u (uint64_t s, uint64_t t)
576{
577  return __builtin_loongson_psubd_u (s, t);
578}
579
580__extension__ static __inline int64_t __attribute__ ((__always_inline__))
581psubd_s (int64_t s, int64_t t)
582{
583  return __builtin_loongson_psubd_s (s, t);
584}
585
586/* Vector subtraction, treating overflow by signed saturation.  */
587__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
588psubsh (int16x4_t s, int16x4_t t)
589{
590  return __builtin_loongson_psubsh (s, t);
591}
592
593__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
594psubsb (int8x8_t s, int8x8_t t)
595{
596  return __builtin_loongson_psubsb (s, t);
597}
598
599/* Vector subtraction, treating overflow by unsigned saturation.  */
600__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
601psubush (uint16x4_t s, uint16x4_t t)
602{
603  return __builtin_loongson_psubush (s, t);
604}
605
606__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
607psubusb (uint8x8_t s, uint8x8_t t)
608{
609  return __builtin_loongson_psubusb (s, t);
610}
611
612/* Unpack high data.  */
613__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
614punpckhwd_u (uint32x2_t s, uint32x2_t t)
615{
616  return __builtin_loongson_punpckhwd_u (s, t);
617}
618
619__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
620punpckhhw_u (uint16x4_t s, uint16x4_t t)
621{
622  return __builtin_loongson_punpckhhw_u (s, t);
623}
624
625__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
626punpckhbh_u (uint8x8_t s, uint8x8_t t)
627{
628  return __builtin_loongson_punpckhbh_u (s, t);
629}
630
631__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
632punpckhwd_s (int32x2_t s, int32x2_t t)
633{
634  return __builtin_loongson_punpckhwd_s (s, t);
635}
636
637__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
638punpckhhw_s (int16x4_t s, int16x4_t t)
639{
640  return __builtin_loongson_punpckhhw_s (s, t);
641}
642
643__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
644punpckhbh_s (int8x8_t s, int8x8_t t)
645{
646  return __builtin_loongson_punpckhbh_s (s, t);
647}
648
649/* Unpack low data.  */
650__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
651punpcklwd_u (uint32x2_t s, uint32x2_t t)
652{
653  return __builtin_loongson_punpcklwd_u (s, t);
654}
655
656__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
657punpcklhw_u (uint16x4_t s, uint16x4_t t)
658{
659  return __builtin_loongson_punpcklhw_u (s, t);
660}
661
662__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
663punpcklbh_u (uint8x8_t s, uint8x8_t t)
664{
665  return __builtin_loongson_punpcklbh_u (s, t);
666}
667
668__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
669punpcklwd_s (int32x2_t s, int32x2_t t)
670{
671  return __builtin_loongson_punpcklwd_s (s, t);
672}
673
674__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
675punpcklhw_s (int16x4_t s, int16x4_t t)
676{
677  return __builtin_loongson_punpcklhw_s (s, t);
678}
679
680__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
681punpcklbh_s (int8x8_t s, int8x8_t t)
682{
683  return __builtin_loongson_punpcklbh_s (s, t);
684}
685
686#ifdef __cplusplus
687}
688#endif
689
690#endif
691