1/*===-------- avxvnniint8intrin.h - AVXVNNIINT8 intrinsics -----------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9#ifndef __IMMINTRIN_H 10#error \ 11 "Never use <avxvnniint8intrin.h> directly; include <immintrin.h> instead." 12#endif 13 14#ifndef __AVXVNNIINT8INTRIN_H 15#define __AVXVNNIINT8INTRIN_H 16 17/* Define the default attributes for the functions in this file. */ 18#define __DEFAULT_FN_ATTRS256 \ 19 __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint8"), \ 20 __min_vector_width__(256))) 21#define __DEFAULT_FN_ATTRS128 \ 22 __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint8"), \ 23 __min_vector_width__(128))) 24 25/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with 26/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate 27/// signed 16-bit results. Sum these 4 results with the corresponding 28/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. 29/// 30/// \headerfile <x86intrin.h> 31/// 32/// \code 33/// _mm_dpbssd_epi32(__m128i __W, __m128i __A, __m128i __B); 34/// \endcode 35/// 36/// This intrinsic corresponds to the \c VPDPBSSD instruction. 37/// 38/// \param __A 39/// A 128-bit vector of [16 x char]. 40/// \param __B 41/// A 128-bit vector of [16 x char]. 42/// \returns 43/// A 128-bit vector of [4 x int]. 44/// 45/// \code{.operation} 46/// FOR j := 0 to 3 47/// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) 48/// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) 49/// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) 50/// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) 51/// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 52/// ENDFOR 53/// dst[MAX:128] := 0 54/// \endcode 55static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbssd_epi32(__m128i __W, 56 __m128i __A, 57 __m128i __B) { 58 return (__m128i)__builtin_ia32_vpdpbssd128((__v4si)__W, (__v4si)__A, 59 (__v4si)__B); 60} 61 62/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with 63/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate 64/// signed 16-bit results. Sum these 4 results with the corresponding 65/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. 66/// 67/// \headerfile <x86intrin.h> 68/// 69/// \code 70/// _mm256_dpbssd_epi32(__m256i __W, __m256i __A, __m256i __B); 71/// \endcode 72/// 73/// This intrinsic corresponds to the \c VPDPBSSD instruction. 74/// 75/// \param __A 76/// A 256-bit vector of [32 x char]. 77/// \param __B 78/// A 256-bit vector of [32 x char]. 79/// \returns 80/// A 256-bit vector of [8 x int]. 81/// 82/// \code{.operation} 83/// FOR j := 0 to 7 84/// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) 85/// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) 86/// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) 87/// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) 88/// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 89/// ENDFOR 90/// dst[MAX:256] := 0 91/// \endcode 92static __inline__ __m256i __DEFAULT_FN_ATTRS256 93_mm256_dpbssd_epi32(__m256i __W, __m256i __A, __m256i __B) { 94 return (__m256i)__builtin_ia32_vpdpbssd256((__v8si)__W, (__v8si)__A, 95 (__v8si)__B); 96} 97 98/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with 99/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate 100/// signed 16-bit results. Sum these 4 results with the corresponding 101/// 32-bit integer in \a __W with signed saturation, and store the packed 102/// 32-bit results in \a dst. 103/// 104/// \headerfile <x86intrin.h> 105/// 106/// \code 107/// _mm_dpbssds_epi32( __m128i __W, __m128i __A, __m128i __B); 108/// \endcode 109/// 110/// This intrinsic corresponds to the \c VPDPBSSD instruction. 111/// 112/// \param __A 113/// A 128-bit vector of [16 x char]. 114/// \param __B 115/// A 128-bit vector of [16 x char]. 116/// \returns 117/// A 128-bit vector of [4 x int]. 118/// 119/// \code{.operation} 120/// FOR j := 0 to 3 121/// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) 122/// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) 123/// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) 124/// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) 125/// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) 126/// ENDFOR 127/// dst[MAX:128] := 0 128/// \endcode 129static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbssds_epi32(__m128i __W, 130 __m128i __A, 131 __m128i __B) { 132 return (__m128i)__builtin_ia32_vpdpbssds128((__v4si)__W, (__v4si)__A, 133 (__v4si)__B); 134} 135 136/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with 137/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate 138/// signed 16-bit results. Sum these 4 results with the corresponding 139/// 32-bit integer in \a __W with signed saturation, and store the packed 140/// 32-bit results in \a dst. 141/// 142/// \headerfile <x86intrin.h> 143/// 144/// \code 145/// _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B); 146/// \endcode 147/// 148/// This intrinsic corresponds to the \c VPDPBSSD instruction. 149/// 150/// \param __A 151/// A 256-bit vector of [32 x char]. 152/// \param __B 153/// A 256-bit vector of [32 x char]. 154/// \returns 155/// A 256-bit vector of [8 x int]. 156/// 157/// \code{.operation} 158/// FOR j := 0 to 7 159/// tmp1.word := SignExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j]) 160/// tmp2.word := SignExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1]) 161/// tmp3.word := SignExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2]) 162/// tmp4.word := SignExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3]) 163/// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) 164/// ENDFOR 165/// dst[MAX:256] := 0 166/// \endcode 167static __inline__ __m256i __DEFAULT_FN_ATTRS256 168_mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B) { 169 return (__m256i)__builtin_ia32_vpdpbssds256((__v8si)__W, (__v8si)__A, 170 (__v8si)__B); 171} 172 173/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with 174/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate 175/// signed 16-bit results. Sum these 4 results with the corresponding 176/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. 177/// 178/// \headerfile <x86intrin.h> 179/// 180/// \code 181/// _mm_dpbsud_epi32(__m128i __W, __m128i __A, __m128i __B); 182/// \endcode 183/// 184/// This intrinsic corresponds to the \c VPDPBSSD instruction. 185/// 186/// \param __A 187/// A 128-bit vector of [16 x char]. 188/// \param __B 189/// A 128-bit vector of [16 x unsigned char]. 190/// \returns 191/// A 128-bit vector of [4 x int]. 192/// 193/// \code{.operation} 194/// FOR j := 0 to 3 195/// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) 196/// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) 197/// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) 198/// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) 199/// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 200/// ENDFOR 201/// dst[MAX:128] := 0 202/// \endcode 203static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbsud_epi32(__m128i __W, 204 __m128i __A, 205 __m128i __B) { 206 return (__m128i)__builtin_ia32_vpdpbsud128((__v4si)__W, (__v4si)__A, 207 (__v4si)__B); 208} 209 210/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with 211/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate 212/// signed 16-bit results. Sum these 4 results with the corresponding 213/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. 214/// 215/// \headerfile <x86intrin.h> 216/// 217/// \code 218/// _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B); 219/// \endcode 220/// 221/// This intrinsic corresponds to the \c VPDPBSSD instruction. 222/// 223/// \param __A 224/// A 256-bit vector of [32 x char]. 225/// \param __B 226/// A 256-bit vector of [32 x unsigned char]. 227/// \returns 228/// A 256-bit vector of [8 x int]. 229/// 230/// \code{.operation} 231/// FOR j := 0 to 7 232/// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) 233/// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) 234/// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) 235/// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) 236/// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 237/// ENDFOR 238/// dst[MAX:256] := 0 239/// \endcode 240static __inline__ __m256i __DEFAULT_FN_ATTRS256 241_mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B) { 242 return (__m256i)__builtin_ia32_vpdpbsud256((__v8si)__W, (__v8si)__A, 243 (__v8si)__B); 244} 245 246/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with 247/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate 248/// signed 16-bit results. Sum these 4 results with the corresponding 249/// 32-bit integer in \a __W with signed saturation, and store the packed 250/// 32-bit results in \a dst. 251/// 252/// \headerfile <x86intrin.h> 253/// 254/// \code 255/// _mm_dpbsuds_epi32( __m128i __W, __m128i __A, __m128i __B); 256/// \endcode 257/// 258/// This intrinsic corresponds to the \c VPDPBSSD instruction. 259/// 260/// \param __A 261/// A 128-bit vector of [16 x char]. 262/// \param __B 263/// A 128-bit vector of [16 x unsigned char]. 264/// \returns 265/// A 128-bit vector of [4 x int]. 266/// 267/// \code{.operation} 268/// FOR j := 0 to 3 269/// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) 270/// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) 271/// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) 272/// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) 273/// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) 274/// ENDFOR 275/// dst[MAX:128] := 0 276/// \endcode 277static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbsuds_epi32(__m128i __W, 278 __m128i __A, 279 __m128i __B) { 280 return (__m128i)__builtin_ia32_vpdpbsuds128((__v4si)__W, (__v4si)__A, 281 (__v4si)__B); 282} 283 284/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with 285/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate 286/// signed 16-bit results. Sum these 4 results with the corresponding 287/// 32-bit integer in \a __W with signed saturation, and store the packed 288/// 32-bit results in \a dst. 289/// 290/// \headerfile <x86intrin.h> 291/// 292/// \code 293/// _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B); 294/// \endcode 295/// 296/// This intrinsic corresponds to the \c VPDPBSSD instruction. 297/// 298/// \param __A 299/// A 256-bit vector of [32 x char]. 300/// \param __B 301/// A 256-bit vector of [32 x unsigned char]. 302/// \returns 303/// A 256-bit vector of [8 x int]. 304/// 305/// \code{.operation} 306/// FOR j := 0 to 7 307/// tmp1.word := Signed(SignExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j])) 308/// tmp2.word := Signed(SignExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1])) 309/// tmp3.word := Signed(SignExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2])) 310/// tmp4.word := Signed(SignExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3])) 311/// dst.dword[j] := SIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) 312/// ENDFOR 313/// dst[MAX:256] := 0 314/// \endcode 315static __inline__ __m256i __DEFAULT_FN_ATTRS256 316_mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B) { 317 return (__m256i)__builtin_ia32_vpdpbsuds256((__v8si)__W, (__v8si)__A, 318 (__v8si)__B); 319} 320 321/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with 322/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate 323/// signed 16-bit results. Sum these 4 results with the corresponding 324/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. 325/// 326/// \headerfile <x86intrin.h> 327/// 328/// \code 329/// _mm_dpbuud_epi32(__m128i __W, __m128i __A, __m128i __B); 330/// \endcode 331/// 332/// This intrinsic corresponds to the \c VPDPBSSD instruction. 333/// 334/// \param __A 335/// A 128-bit vector of [16 x unsigned char]. 336/// \param __B 337/// A 128-bit vector of [16 x unsigned char]. 338/// \returns 339/// A 128-bit vector of [4 x int]. 340/// 341/// \code{.operation} 342/// FOR j := 0 to 3 343/// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) 344/// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) 345/// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) 346/// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) 347/// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 348/// ENDFOR 349/// dst[MAX:128] := 0 350/// \endcode 351static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbuud_epi32(__m128i __W, 352 __m128i __A, 353 __m128i __B) { 354 return (__m128i)__builtin_ia32_vpdpbuud128((__v4si)__W, (__v4si)__A, 355 (__v4si)__B); 356} 357 358/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with 359/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate 360/// signed 16-bit results. Sum these 4 results with the corresponding 361/// 32-bit integer in \a __W, and store the packed 32-bit results in \a dst. 362/// 363/// \headerfile <x86intrin.h> 364/// 365/// \code 366/// _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B); 367/// \endcode 368/// 369/// This intrinsic corresponds to the \c VPDPBSSD instruction. 370/// 371/// \param __A 372/// A 256-bit vector of [32 x unsigned char]. 373/// \param __B 374/// A 256-bit vector of [32 x unsigned char]. 375/// \returns 376/// A 256-bit vector of [8 x int]. 377/// 378/// \code{.operation} 379/// FOR j := 0 to 7 380/// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) 381/// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) 382/// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) 383/// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) 384/// dst.dword[j] := __W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 385/// ENDFOR 386/// dst[MAX:256] := 0 387/// \endcode 388static __inline__ __m256i __DEFAULT_FN_ATTRS256 389_mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B) { 390 return (__m256i)__builtin_ia32_vpdpbuud256((__v8si)__W, (__v8si)__A, 391 (__v8si)__B); 392} 393 394/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with 395/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate 396/// signed 16-bit results. Sum these 4 results with the corresponding 397/// 32-bit integer in \a __W with signed saturation, and store the packed 398/// 32-bit results in \a dst. 399/// 400/// \headerfile <x86intrin.h> 401/// 402/// \code 403/// _mm_dpbuuds_epi32( __m128i __W, __m128i __A, __m128i __B); 404/// \endcode 405/// 406/// This intrinsic corresponds to the \c VPDPBUUDS instruction. 407/// 408/// \param __A 409/// A 128-bit vector of [16 x unsigned char]. 410/// \param __B 411/// A 128-bit vector of [16 x unsigned char]. 412/// \returns 413/// A 128-bit vector of [4 x int]. 414/// 415/// \code{.operation} 416/// FOR j := 0 to 3 417/// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) 418/// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) 419/// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) 420/// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) 421/// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) 422/// ENDFOR 423/// dst[MAX:128] := 0 424/// \endcode 425static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbuuds_epi32(__m128i __W, 426 __m128i __A, 427 __m128i __B) { 428 return (__m128i)__builtin_ia32_vpdpbuuds128((__v4si)__W, (__v4si)__A, 429 (__v4si)__B); 430} 431 432/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with 433/// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate 434/// signed 16-bit results. Sum these 4 results with the corresponding 435/// 32-bit integer in \a __W with signed saturation, and store the packed 436/// 32-bit results in \a dst. 437/// 438/// \headerfile <x86intrin.h> 439/// 440/// \code 441/// _mm256_dpbuuds_epi32(__m256i __W, __m256i __A, __m256i __B); 442/// \endcode 443/// 444/// This intrinsic corresponds to the \c VPDPBUUDS instruction. 445/// 446/// \param __A 447/// A 256-bit vector of [32 x unsigned char]. 448/// \param __B 449/// A 256-bit vector of [32 x unsigned char]. 450/// \returns 451/// A 256-bit vector of [8 x int]. 452/// 453/// \code{.operation} 454/// FOR j := 0 to 7 455/// tmp1.word := ZeroExtend16(__A.byte[4*j]) * ZeroExtend16(__B.byte[4*j]) 456/// tmp2.word := ZeroExtend16(__A.byte[4*j+1]) * ZeroExtend16(__B.byte[4*j+1]) 457/// tmp3.word := ZeroExtend16(__A.byte[4*j+2]) * ZeroExtend16(__B.byte[4*j+2]) 458/// tmp4.word := ZeroExtend16(__A.byte[4*j+3]) * ZeroExtend16(__B.byte[4*j+3]) 459/// dst.dword[j] := UNSIGNED_DWORD_SATURATE(__W.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) 460/// ENDFOR 461/// dst[MAX:256] := 0 462/// \endcode 463static __inline__ __m256i __DEFAULT_FN_ATTRS256 464_mm256_dpbuuds_epi32(__m256i __W, __m256i __A, __m256i __B) { 465 return (__m256i)__builtin_ia32_vpdpbuuds256((__v8si)__W, (__v8si)__A, 466 (__v8si)__B); 467} 468#undef __DEFAULT_FN_ATTRS128 469#undef __DEFAULT_FN_ATTRS256 470 471#endif // __AVXVNNIINT8INTRIN_H 472