root / lab4 / .minix-src / include / clang-3.6 / avx512fintrin.h @ 13
History | View | Annotate | Download (34.5 KB)
1 |
/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
|
---|---|
2 |
*
|
3 |
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
4 |
* of this software and associated documentation files (the "Software"), to deal
|
5 |
* in the Software without restriction, including without limitation the rights
|
6 |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7 |
* copies of the Software, and to permit persons to whom the Software is
|
8 |
* furnished to do so, subject to the following conditions:
|
9 |
*
|
10 |
* The above copyright notice and this permission notice shall be included in
|
11 |
* all copies or substantial portions of the Software.
|
12 |
*
|
13 |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14 |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15 |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16 |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17 |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18 |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19 |
* THE SOFTWARE.
|
20 |
*
|
21 |
*===-----------------------------------------------------------------------===
|
22 |
*/
|
23 |
#ifndef __IMMINTRIN_H
|
24 |
#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." |
25 |
#endif
|
26 |
|
27 |
#ifndef __AVX512FINTRIN_H
|
28 |
#define __AVX512FINTRIN_H
|
29 |
|
30 |
typedef double __v8df __attribute__((__vector_size__(64))); |
31 |
typedef float __v16sf __attribute__((__vector_size__(64))); |
32 |
typedef long long __v8di __attribute__((__vector_size__(64))); |
33 |
typedef int __v16si __attribute__((__vector_size__(64))); |
34 |
|
35 |
typedef float __m512 __attribute__((__vector_size__(64))); |
36 |
typedef double __m512d __attribute__((__vector_size__(64))); |
37 |
typedef long long __m512i __attribute__((__vector_size__(64))); |
38 |
|
39 |
typedef unsigned char __mmask8; |
40 |
typedef unsigned short __mmask16; |
41 |
|
42 |
/* Rounding mode macros. */
|
43 |
#define _MM_FROUND_TO_NEAREST_INT 0x00 |
44 |
#define _MM_FROUND_TO_NEG_INF 0x01 |
45 |
#define _MM_FROUND_TO_POS_INF 0x02 |
46 |
#define _MM_FROUND_TO_ZERO 0x03 |
47 |
#define _MM_FROUND_CUR_DIRECTION 0x04 |
48 |
|
49 |
/* Create vectors with repeated elements */
|
50 |
|
51 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
52 |
_mm512_setzero_si512(void)
|
53 |
{ |
54 |
return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; |
55 |
} |
56 |
|
57 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
58 |
_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
|
59 |
{ |
60 |
return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
|
61 |
(__v16si) |
62 |
_mm512_setzero_si512 (), |
63 |
__M); |
64 |
} |
65 |
|
66 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
67 |
_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) |
68 |
{ |
69 |
#ifdef __x86_64__
|
70 |
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
|
71 |
(__v8di) |
72 |
_mm512_setzero_si512 (), |
73 |
__M); |
74 |
#else
|
75 |
return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
|
76 |
(__v8di) |
77 |
_mm512_setzero_si512 (), |
78 |
__M); |
79 |
#endif
|
80 |
} |
81 |
|
82 |
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
83 |
_mm512_setzero_ps(void)
|
84 |
{ |
85 |
return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, |
86 |
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
87 |
} |
88 |
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
89 |
_mm512_setzero_pd(void)
|
90 |
{ |
91 |
return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
92 |
} |
93 |
|
94 |
static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
95 |
_mm512_set1_ps(float __w)
|
96 |
{ |
97 |
return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
|
98 |
__w, __w, __w, __w, __w, __w, __w, __w }; |
99 |
} |
100 |
|
101 |
static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
102 |
_mm512_set1_pd(double __w)
|
103 |
{ |
104 |
return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
|
105 |
} |
106 |
|
107 |
static __inline __m512i __attribute__((__always_inline__, __nodebug__))
|
108 |
_mm512_set1_epi32(int __s)
|
109 |
{ |
110 |
return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
|
111 |
__s, __s, __s, __s, __s, __s, __s, __s }; |
112 |
} |
113 |
|
114 |
static __inline __m512i __attribute__((__always_inline__, __nodebug__))
|
115 |
_mm512_set1_epi64(long long __d) |
116 |
{ |
117 |
return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
|
118 |
} |
119 |
|
120 |
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
121 |
_mm512_broadcastss_ps(__m128 __X) |
122 |
{ |
123 |
float __f = __X[0]; |
124 |
return (__v16sf){ __f, __f, __f, __f,
|
125 |
__f, __f, __f, __f, |
126 |
__f, __f, __f, __f, |
127 |
__f, __f, __f, __f }; |
128 |
} |
129 |
|
130 |
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
131 |
_mm512_broadcastsd_pd(__m128d __X) |
132 |
{ |
133 |
double __d = __X[0]; |
134 |
return (__v8df){ __d, __d, __d, __d,
|
135 |
__d, __d, __d, __d }; |
136 |
} |
137 |
|
138 |
/* Cast between vector types */
|
139 |
|
140 |
static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
141 |
_mm512_castpd256_pd512(__m256d __a) |
142 |
{ |
143 |
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); |
144 |
} |
145 |
|
146 |
static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
147 |
_mm512_castps256_ps512(__m256 __a) |
148 |
{ |
149 |
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, |
150 |
-1, -1, -1, -1, -1, -1, -1, -1); |
151 |
} |
152 |
|
153 |
static __inline __m128d __attribute__((__always_inline__, __nodebug__))
|
154 |
_mm512_castpd512_pd128(__m512d __a) |
155 |
{ |
156 |
return __builtin_shufflevector(__a, __a, 0, 1); |
157 |
} |
158 |
|
159 |
static __inline __m128 __attribute__((__always_inline__, __nodebug__))
|
160 |
_mm512_castps512_ps128(__m512 __a) |
161 |
{ |
162 |
return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); |
163 |
} |
164 |
|
165 |
/* Arithmetic */
|
166 |
|
167 |
static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
168 |
_mm512_add_pd(__m512d __a, __m512d __b) |
169 |
{ |
170 |
return __a + __b;
|
171 |
} |
172 |
|
173 |
static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
174 |
_mm512_add_ps(__m512 __a, __m512 __b) |
175 |
{ |
176 |
return __a + __b;
|
177 |
} |
178 |
|
179 |
static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
180 |
_mm512_mul_pd(__m512d __a, __m512d __b) |
181 |
{ |
182 |
return __a * __b;
|
183 |
} |
184 |
|
185 |
static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
186 |
_mm512_mul_ps(__m512 __a, __m512 __b) |
187 |
{ |
188 |
return __a * __b;
|
189 |
} |
190 |
|
191 |
static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
192 |
_mm512_sub_pd(__m512d __a, __m512d __b) |
193 |
{ |
194 |
return __a - __b;
|
195 |
} |
196 |
|
197 |
static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
198 |
_mm512_sub_ps(__m512 __a, __m512 __b) |
199 |
{ |
200 |
return __a - __b;
|
201 |
} |
202 |
|
203 |
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
204 |
_mm512_max_pd(__m512d __A, __m512d __B) |
205 |
{ |
206 |
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
|
207 |
(__v8df) __B, |
208 |
(__v8df) |
209 |
_mm512_setzero_pd (), |
210 |
(__mmask8) -1,
|
211 |
_MM_FROUND_CUR_DIRECTION); |
212 |
} |
213 |
|
214 |
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
215 |
_mm512_max_ps(__m512 __A, __m512 __B) |
216 |
{ |
217 |
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
|
218 |
(__v16sf) __B, |
219 |
(__v16sf) |
220 |
_mm512_setzero_ps (), |
221 |
(__mmask16) -1,
|
222 |
_MM_FROUND_CUR_DIRECTION); |
223 |
} |
224 |
|
225 |
static __inline __m512i
|
226 |
__attribute__ ((__always_inline__, __nodebug__)) |
227 |
_mm512_max_epi32(__m512i __A, __m512i __B) |
228 |
{ |
229 |
return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
|
230 |
(__v16si) __B, |
231 |
(__v16si) |
232 |
_mm512_setzero_si512 (), |
233 |
(__mmask16) -1);
|
234 |
} |
235 |
|
236 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
237 |
_mm512_max_epu32(__m512i __A, __m512i __B) |
238 |
{ |
239 |
return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
|
240 |
(__v16si) __B, |
241 |
(__v16si) |
242 |
_mm512_setzero_si512 (), |
243 |
(__mmask16) -1);
|
244 |
} |
245 |
|
246 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
247 |
_mm512_max_epi64(__m512i __A, __m512i __B) |
248 |
{ |
249 |
return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
|
250 |
(__v8di) __B, |
251 |
(__v8di) |
252 |
_mm512_setzero_si512 (), |
253 |
(__mmask8) -1);
|
254 |
} |
255 |
|
256 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
257 |
_mm512_max_epu64(__m512i __A, __m512i __B) |
258 |
{ |
259 |
return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
|
260 |
(__v8di) __B, |
261 |
(__v8di) |
262 |
_mm512_setzero_si512 (), |
263 |
(__mmask8) -1);
|
264 |
} |
265 |
|
266 |
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
267 |
_mm512_min_pd(__m512d __A, __m512d __B) |
268 |
{ |
269 |
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
|
270 |
(__v8df) __B, |
271 |
(__v8df) |
272 |
_mm512_setzero_pd (), |
273 |
(__mmask8) -1,
|
274 |
_MM_FROUND_CUR_DIRECTION); |
275 |
} |
276 |
|
277 |
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
278 |
_mm512_min_ps(__m512 __A, __m512 __B) |
279 |
{ |
280 |
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
|
281 |
(__v16sf) __B, |
282 |
(__v16sf) |
283 |
_mm512_setzero_ps (), |
284 |
(__mmask16) -1,
|
285 |
_MM_FROUND_CUR_DIRECTION); |
286 |
} |
287 |
|
288 |
static __inline __m512i
|
289 |
__attribute__ ((__always_inline__, __nodebug__)) |
290 |
_mm512_min_epi32(__m512i __A, __m512i __B) |
291 |
{ |
292 |
return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
|
293 |
(__v16si) __B, |
294 |
(__v16si) |
295 |
_mm512_setzero_si512 (), |
296 |
(__mmask16) -1);
|
297 |
} |
298 |
|
299 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
300 |
_mm512_min_epu32(__m512i __A, __m512i __B) |
301 |
{ |
302 |
return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
|
303 |
(__v16si) __B, |
304 |
(__v16si) |
305 |
_mm512_setzero_si512 (), |
306 |
(__mmask16) -1);
|
307 |
} |
308 |
|
309 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
310 |
_mm512_min_epi64(__m512i __A, __m512i __B) |
311 |
{ |
312 |
return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
|
313 |
(__v8di) __B, |
314 |
(__v8di) |
315 |
_mm512_setzero_si512 (), |
316 |
(__mmask8) -1);
|
317 |
} |
318 |
|
319 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
320 |
_mm512_min_epu64(__m512i __A, __m512i __B) |
321 |
{ |
322 |
return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
|
323 |
(__v8di) __B, |
324 |
(__v8di) |
325 |
_mm512_setzero_si512 (), |
326 |
(__mmask8) -1);
|
327 |
} |
328 |
|
329 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
330 |
_mm512_mul_epi32(__m512i __X, __m512i __Y) |
331 |
{ |
332 |
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
|
333 |
(__v16si) __Y, |
334 |
(__v8di) |
335 |
_mm512_setzero_si512 (), |
336 |
(__mmask8) -1);
|
337 |
} |
338 |
|
339 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
340 |
_mm512_mul_epu32(__m512i __X, __m512i __Y) |
341 |
{ |
342 |
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
|
343 |
(__v16si) __Y, |
344 |
(__v8di) |
345 |
_mm512_setzero_si512 (), |
346 |
(__mmask8) -1);
|
347 |
} |
348 |
|
349 |
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
350 |
_mm512_sqrt_pd(__m512d a) |
351 |
{ |
352 |
return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
|
353 |
(__v8df) _mm512_setzero_pd (), |
354 |
(__mmask8) -1,
|
355 |
_MM_FROUND_CUR_DIRECTION); |
356 |
} |
357 |
|
358 |
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
359 |
_mm512_sqrt_ps(__m512 a) |
360 |
{ |
361 |
return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
|
362 |
(__v16sf) _mm512_setzero_ps (), |
363 |
(__mmask16) -1,
|
364 |
_MM_FROUND_CUR_DIRECTION); |
365 |
} |
366 |
|
367 |
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
368 |
_mm512_rsqrt14_pd(__m512d __A) |
369 |
{ |
370 |
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
|
371 |
(__v8df) |
372 |
_mm512_setzero_pd (), |
373 |
(__mmask8) -1);}
|
374 |
|
375 |
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
376 |
_mm512_rsqrt14_ps(__m512 __A) |
377 |
{ |
378 |
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
|
379 |
(__v16sf) |
380 |
_mm512_setzero_ps (), |
381 |
(__mmask16) -1);
|
382 |
} |
383 |
|
384 |
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
385 |
_mm_rsqrt14_ss(__m128 __A, __m128 __B) |
386 |
{ |
387 |
return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
|
388 |
(__v4sf) __B, |
389 |
(__v4sf) |
390 |
_mm_setzero_ps (), |
391 |
(__mmask8) -1);
|
392 |
} |
393 |
|
394 |
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
395 |
_mm_rsqrt14_sd(__m128d __A, __m128d __B) |
396 |
{ |
397 |
return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
|
398 |
(__v2df) __B, |
399 |
(__v2df) |
400 |
_mm_setzero_pd (), |
401 |
(__mmask8) -1);
|
402 |
} |
403 |
|
404 |
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
405 |
_mm512_rcp14_pd(__m512d __A) |
406 |
{ |
407 |
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
|
408 |
(__v8df) |
409 |
_mm512_setzero_pd (), |
410 |
(__mmask8) -1);
|
411 |
} |
412 |
|
413 |
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
414 |
_mm512_rcp14_ps(__m512 __A) |
415 |
{ |
416 |
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
|
417 |
(__v16sf) |
418 |
_mm512_setzero_ps (), |
419 |
(__mmask16) -1);
|
420 |
} |
421 |
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
422 |
_mm_rcp14_ss(__m128 __A, __m128 __B) |
423 |
{ |
424 |
return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
|
425 |
(__v4sf) __B, |
426 |
(__v4sf) |
427 |
_mm_setzero_ps (), |
428 |
(__mmask8) -1);
|
429 |
} |
430 |
|
431 |
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
432 |
_mm_rcp14_sd(__m128d __A, __m128d __B) |
433 |
{ |
434 |
return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
|
435 |
(__v2df) __B, |
436 |
(__v2df) |
437 |
_mm_setzero_pd (), |
438 |
(__mmask8) -1);
|
439 |
} |
440 |
|
441 |
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
442 |
_mm512_floor_ps(__m512 __A) |
443 |
{ |
444 |
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
|
445 |
_MM_FROUND_FLOOR, |
446 |
(__v16sf) __A, -1,
|
447 |
_MM_FROUND_CUR_DIRECTION); |
448 |
} |
449 |
|
450 |
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
451 |
_mm512_floor_pd(__m512d __A) |
452 |
{ |
453 |
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
|
454 |
_MM_FROUND_FLOOR, |
455 |
(__v8df) __A, -1,
|
456 |
_MM_FROUND_CUR_DIRECTION); |
457 |
} |
458 |
|
459 |
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
460 |
_mm512_ceil_ps(__m512 __A) |
461 |
{ |
462 |
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
|
463 |
_MM_FROUND_CEIL, |
464 |
(__v16sf) __A, -1,
|
465 |
_MM_FROUND_CUR_DIRECTION); |
466 |
} |
467 |
|
468 |
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
469 |
_mm512_ceil_pd(__m512d __A) |
470 |
{ |
471 |
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
|
472 |
_MM_FROUND_CEIL, |
473 |
(__v8df) __A, -1,
|
474 |
_MM_FROUND_CUR_DIRECTION); |
475 |
} |
476 |
|
477 |
static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
|
478 |
_mm512_abs_epi64(__m512i __A) |
479 |
{ |
480 |
return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
|
481 |
(__v8di) |
482 |
_mm512_setzero_si512 (), |
483 |
(__mmask8) -1);
|
484 |
} |
485 |
|
486 |
static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
|
487 |
_mm512_abs_epi32(__m512i __A) |
488 |
{ |
489 |
return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
|
490 |
(__v16si) |
491 |
_mm512_setzero_si512 (), |
492 |
(__mmask16) -1);
|
493 |
} |
494 |
|
495 |
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
496 |
_mm512_roundscale_ps(__m512 __A, const int __imm) |
497 |
{ |
498 |
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
|
499 |
(__v16sf) __A, -1,
|
500 |
_MM_FROUND_CUR_DIRECTION); |
501 |
} |
502 |
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
503 |
_mm512_roundscale_pd(__m512d __A, const int __imm) |
504 |
{ |
505 |
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
|
506 |
(__v8df) __A, -1,
|
507 |
_MM_FROUND_CUR_DIRECTION); |
508 |
} |
509 |
|
510 |
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
511 |
_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) |
512 |
{ |
513 |
return (__m512d)
|
514 |
__builtin_ia32_vfmaddpd512_mask(__A, |
515 |
__B, |
516 |
__C, |
517 |
(__mmask8) -1,
|
518 |
_MM_FROUND_CUR_DIRECTION); |
519 |
} |
520 |
|
521 |
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
522 |
_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) |
523 |
{ |
524 |
return (__m512d)
|
525 |
__builtin_ia32_vfmsubpd512_mask(__A, |
526 |
__B, |
527 |
__C, |
528 |
(__mmask8) -1,
|
529 |
_MM_FROUND_CUR_DIRECTION); |
530 |
} |
531 |
|
532 |
static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
533 |
_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) |
534 |
{ |
535 |
return (__m512d)
|
536 |
__builtin_ia32_vfnmaddpd512_mask(__A, |
537 |
__B, |
538 |
__C, |
539 |
(__mmask8) -1,
|
540 |
_MM_FROUND_CUR_DIRECTION); |
541 |
} |
542 |
|
543 |
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
544 |
_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) |
545 |
{ |
546 |
return (__m512)
|
547 |
__builtin_ia32_vfmaddps512_mask(__A, |
548 |
__B, |
549 |
__C, |
550 |
(__mmask16) -1,
|
551 |
_MM_FROUND_CUR_DIRECTION); |
552 |
} |
553 |
|
554 |
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
555 |
_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) |
556 |
{ |
557 |
return (__m512)
|
558 |
__builtin_ia32_vfmsubps512_mask(__A, |
559 |
__B, |
560 |
__C, |
561 |
(__mmask16) -1,
|
562 |
_MM_FROUND_CUR_DIRECTION); |
563 |
} |
564 |
|
565 |
static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
566 |
_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) |
567 |
{ |
568 |
return (__m512)
|
569 |
__builtin_ia32_vfnmaddps512_mask(__A, |
570 |
__B, |
571 |
__C, |
572 |
(__mmask16) -1,
|
573 |
_MM_FROUND_CUR_DIRECTION); |
574 |
} |
575 |
|
576 |
/* Vector permutations */
|
577 |
|
578 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
579 |
_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) |
580 |
{ |
581 |
return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
|
582 |
/* idx */ ,
|
583 |
(__v16si) __A, |
584 |
(__v16si) __B, |
585 |
(__mmask16) -1);
|
586 |
} |
587 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
588 |
_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) |
589 |
{ |
590 |
return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
|
591 |
/* idx */ ,
|
592 |
(__v8di) __A, |
593 |
(__v8di) __B, |
594 |
(__mmask8) -1);
|
595 |
} |
596 |
|
597 |
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
598 |
_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) |
599 |
{ |
600 |
return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
|
601 |
/* idx */ ,
|
602 |
(__v8df) __A, |
603 |
(__v8df) __B, |
604 |
(__mmask8) -1);
|
605 |
} |
606 |
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
607 |
_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) |
608 |
{ |
609 |
return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
|
610 |
/* idx */ ,
|
611 |
(__v16sf) __A, |
612 |
(__v16sf) __B, |
613 |
(__mmask16) -1);
|
614 |
} |
615 |
|
616 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
617 |
_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I) |
618 |
{ |
619 |
return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A,
|
620 |
(__v8di)__B, |
621 |
__I, |
622 |
(__v8di)_mm512_setzero_si512(), |
623 |
(__mmask8) -1);
|
624 |
} |
625 |
|
626 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
627 |
_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I) |
628 |
{ |
629 |
return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A,
|
630 |
(__v16si)__B, |
631 |
__I, |
632 |
(__v16si)_mm512_setzero_si512(), |
633 |
(__mmask16) -1);
|
634 |
} |
635 |
|
636 |
/* Vector Blend */
|
637 |
|
638 |
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
639 |
_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) |
640 |
{ |
641 |
return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
|
642 |
(__v8df) __W, |
643 |
(__mmask8) __U); |
644 |
} |
645 |
|
646 |
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
647 |
_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) |
648 |
{ |
649 |
return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
|
650 |
(__v16sf) __W, |
651 |
(__mmask16) __U); |
652 |
} |
653 |
|
654 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
655 |
_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) |
656 |
{ |
657 |
return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
|
658 |
(__v8di) __W, |
659 |
(__mmask8) __U); |
660 |
} |
661 |
|
662 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
663 |
_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) |
664 |
{ |
665 |
return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
|
666 |
(__v16si) __W, |
667 |
(__mmask16) __U); |
668 |
} |
669 |
|
670 |
/* Compare */
|
671 |
|
672 |
static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
|
673 |
_mm512_cmp_ps_mask(__m512 a, __m512 b, const int p) |
674 |
{ |
675 |
return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
|
676 |
(__v16sf) b, p, (__mmask16) -1,
|
677 |
_MM_FROUND_CUR_DIRECTION); |
678 |
} |
679 |
|
680 |
static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
|
681 |
_mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P) |
682 |
{ |
683 |
return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
|
684 |
(__v8df) __Y, __P, |
685 |
(__mmask8) -1,
|
686 |
_MM_FROUND_CUR_DIRECTION); |
687 |
} |
688 |
|
689 |
/* Conversion */
|
690 |
|
691 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
692 |
_mm512_cvttps_epu32(__m512 __A) |
693 |
{ |
694 |
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
|
695 |
(__v16si) |
696 |
_mm512_setzero_si512 (), |
697 |
(__mmask16) -1,
|
698 |
_MM_FROUND_CUR_DIRECTION); |
699 |
} |
700 |
|
701 |
static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
|
702 |
_mm512_cvt_roundepi32_ps(__m512i __A, const int __R) |
703 |
{ |
704 |
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
|
705 |
(__v16sf) |
706 |
_mm512_setzero_ps (), |
707 |
(__mmask16) -1,
|
708 |
__R); |
709 |
} |
710 |
|
711 |
static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
|
712 |
_mm512_cvt_roundepu32_ps(__m512i __A, const int __R) |
713 |
{ |
714 |
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
|
715 |
(__v16sf) |
716 |
_mm512_setzero_ps (), |
717 |
(__mmask16) -1,
|
718 |
__R); |
719 |
} |
720 |
|
721 |
static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
|
722 |
_mm512_cvtepi32_pd(__m256i __A) |
723 |
{ |
724 |
return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
|
725 |
(__v8df) |
726 |
_mm512_setzero_pd (), |
727 |
(__mmask8) -1);
|
728 |
} |
729 |
|
730 |
static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
|
731 |
_mm512_cvtepu32_pd(__m256i __A) |
732 |
{ |
733 |
return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
|
734 |
(__v8df) |
735 |
_mm512_setzero_pd (), |
736 |
(__mmask8) -1);
|
737 |
} |
738 |
static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
|
739 |
_mm512_cvt_roundpd_ps(__m512d __A, const int __R) |
740 |
{ |
741 |
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
|
742 |
(__v8sf) |
743 |
_mm256_setzero_ps (), |
744 |
(__mmask8) -1,
|
745 |
__R); |
746 |
} |
747 |
|
748 |
static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
749 |
_mm512_cvtps_ph(__m512 __A, const int __I) |
750 |
{ |
751 |
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
|
752 |
__I, |
753 |
(__v16hi) |
754 |
_mm256_setzero_si256 (), |
755 |
-1);
|
756 |
} |
757 |
|
758 |
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
759 |
_mm512_cvtph_ps(__m256i __A) |
760 |
{ |
761 |
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
|
762 |
(__v16sf) |
763 |
_mm512_setzero_ps (), |
764 |
(__mmask16) -1,
|
765 |
_MM_FROUND_CUR_DIRECTION); |
766 |
} |
767 |
|
768 |
static __inline __m512i __attribute__((__always_inline__, __nodebug__))
|
769 |
_mm512_cvttps_epi32(__m512 a) |
770 |
{ |
771 |
return (__m512i)
|
772 |
__builtin_ia32_cvttps2dq512_mask((__v16sf) a, |
773 |
(__v16si) _mm512_setzero_si512 (), |
774 |
(__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
|
775 |
} |
776 |
|
777 |
static __inline __m256i __attribute__((__always_inline__, __nodebug__))
|
778 |
_mm512_cvttpd_epi32(__m512d a) |
779 |
{ |
780 |
return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
|
781 |
(__v8si)_mm256_setzero_si256(), |
782 |
(__mmask8) -1,
|
783 |
_MM_FROUND_CUR_DIRECTION); |
784 |
} |
785 |
|
786 |
static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
787 |
_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R) |
788 |
{ |
789 |
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
|
790 |
(__v8si) |
791 |
_mm256_setzero_si256 (), |
792 |
(__mmask8) -1,
|
793 |
__R); |
794 |
} |
795 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
796 |
_mm512_cvtt_roundps_epi32(__m512 __A, const int __R) |
797 |
{ |
798 |
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
|
799 |
(__v16si) |
800 |
_mm512_setzero_si512 (), |
801 |
(__mmask16) -1,
|
802 |
__R); |
803 |
} |
804 |
|
805 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
806 |
_mm512_cvt_roundps_epi32(__m512 __A, const int __R) |
807 |
{ |
808 |
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
|
809 |
(__v16si) |
810 |
_mm512_setzero_si512 (), |
811 |
(__mmask16) -1,
|
812 |
__R); |
813 |
} |
814 |
static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
815 |
_mm512_cvt_roundpd_epi32(__m512d __A, const int __R) |
816 |
{ |
817 |
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
|
818 |
(__v8si) |
819 |
_mm256_setzero_si256 (), |
820 |
(__mmask8) -1,
|
821 |
__R); |
822 |
} |
823 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
824 |
_mm512_cvt_roundps_epu32(__m512 __A, const int __R) |
825 |
{ |
826 |
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
|
827 |
(__v16si) |
828 |
_mm512_setzero_si512 (), |
829 |
(__mmask16) -1,
|
830 |
__R); |
831 |
} |
832 |
static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
833 |
_mm512_cvt_roundpd_epu32(__m512d __A, const int __R) |
834 |
{ |
835 |
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
|
836 |
(__v8si) |
837 |
_mm256_setzero_si256 (), |
838 |
(__mmask8) -1,
|
839 |
__R); |
840 |
} |
841 |
|
842 |
/* Unpack and Interleave */
|
843 |
static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
844 |
_mm512_unpackhi_pd(__m512d __a, __m512d __b) |
845 |
{ |
846 |
return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); |
847 |
} |
848 |
|
849 |
static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
850 |
_mm512_unpacklo_pd(__m512d __a, __m512d __b) |
851 |
{ |
852 |
return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); |
853 |
} |
854 |
|
855 |
static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
856 |
_mm512_unpackhi_ps(__m512 __a, __m512 __b) |
857 |
{ |
858 |
return __builtin_shufflevector(__a, __b,
|
859 |
2, 18, 3, 19, |
860 |
2+4, 18+4, 3+4, 19+4, |
861 |
2+8, 18+8, 3+8, 19+8, |
862 |
2+12, 18+12, 3+12, 19+12); |
863 |
} |
864 |
|
865 |
static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
866 |
_mm512_unpacklo_ps(__m512 __a, __m512 __b) |
867 |
{ |
868 |
return __builtin_shufflevector(__a, __b,
|
869 |
0, 16, 1, 17, |
870 |
0+4, 16+4, 1+4, 17+4, |
871 |
0+8, 16+8, 1+8, 17+8, |
872 |
0+12, 16+12, 1+12, 17+12); |
873 |
} |
874 |
|
875 |
/* Bit Test */
|
876 |
|
877 |
static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
|
878 |
_mm512_test_epi32_mask(__m512i __A, __m512i __B) |
879 |
{ |
880 |
return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
|
881 |
(__v16si) __B, |
882 |
(__mmask16) -1);
|
883 |
} |
884 |
|
885 |
static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
|
886 |
_mm512_test_epi64_mask(__m512i __A, __m512i __B) |
887 |
{ |
888 |
return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
|
889 |
(__v8di) __B, |
890 |
(__mmask8) -1);
|
891 |
} |
892 |
|
893 |
/* SIMD load ops */
|
894 |
|
895 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
896 |
_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) |
897 |
{ |
898 |
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P, |
899 |
(__v16si) |
900 |
_mm512_setzero_si512 (), |
901 |
(__mmask16) __U); |
902 |
} |
903 |
|
904 |
static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
905 |
_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) |
906 |
{ |
907 |
return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P, |
908 |
(__v8di) |
909 |
_mm512_setzero_si512 (), |
910 |
(__mmask8) __U); |
911 |
} |
912 |
|
913 |
static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
914 |
_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) |
915 |
{ |
916 |
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P, |
917 |
(__v16sf) |
918 |
_mm512_setzero_ps (), |
919 |
(__mmask16) __U); |
920 |
} |
921 |
|
922 |
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
923 |
_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) |
924 |
{ |
925 |
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P, |
926 |
(__v8df) |
927 |
_mm512_setzero_pd (), |
928 |
(__mmask8) __U); |
929 |
} |
930 |
|
931 |
static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
932 |
_mm512_loadu_pd(double const *__p) |
933 |
{ |
934 |
struct __loadu_pd {
|
935 |
__m512d __v; |
936 |
} __attribute__((packed, may_alias)); |
937 |
return ((struct __loadu_pd*)__p)->__v; |
938 |
} |
939 |
|
940 |
static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
941 |
_mm512_loadu_ps(float const *__p) |
942 |
{ |
943 |
struct __loadu_ps {
|
944 |
__m512 __v; |
945 |
} __attribute__((packed, may_alias)); |
946 |
return ((struct __loadu_ps*)__p)->__v; |
947 |
} |
948 |
|
949 |
/* SIMD store ops */
|
950 |
|
951 |
static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
952 |
_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
|
953 |
{ |
954 |
__builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A, |
955 |
(__mmask8) __U); |
956 |
} |
957 |
|
958 |
static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
959 |
_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
|
960 |
{ |
961 |
__builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A, |
962 |
(__mmask16) __U); |
963 |
} |
964 |
|
965 |
static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
966 |
_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
|
967 |
{ |
968 |
__builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); |
969 |
} |
970 |
|
971 |
static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
972 |
_mm512_storeu_pd(void *__P, __m512d __A)
|
973 |
{ |
974 |
__builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
|
975 |
} |
976 |
|
977 |
static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
978 |
_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
|
979 |
{ |
980 |
__builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A, |
981 |
(__mmask16) __U); |
982 |
} |
983 |
|
984 |
static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
985 |
_mm512_storeu_ps(void *__P, __m512 __A)
|
986 |
{ |
987 |
__builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
|
988 |
} |
989 |
|
990 |
static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
991 |
_mm512_store_ps(void *__P, __m512 __A)
|
992 |
{ |
993 |
*(__m512*)__P = __A; |
994 |
} |
995 |
|
996 |
static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
997 |
_mm512_store_pd(void *__P, __m512d __A)
|
998 |
{ |
999 |
*(__m512d*)__P = __A; |
1000 |
} |
1001 |
|
1002 |
/* Mask ops */
|
1003 |
|
1004 |
static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
|
1005 |
_mm512_knot(__mmask16 __M) |
1006 |
{ |
1007 |
return __builtin_ia32_knothi(__M);
|
1008 |
} |
1009 |
|
1010 |
/* Integer compare */
|
1011 |
|
1012 |
static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
|
1013 |
_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { |
1014 |
return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
|
1015 |
(__mmask16)-1);
|
1016 |
} |
1017 |
|
1018 |
static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
|
1019 |
_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { |
1020 |
return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
|
1021 |
__u); |
1022 |
} |
1023 |
|
1024 |
static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
|
1025 |
_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { |
1026 |
return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
|
1027 |
__u); |
1028 |
} |
1029 |
|
1030 |
static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
|
1031 |
_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { |
1032 |
return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
|
1033 |
(__mmask8)-1);
|
1034 |
} |
1035 |
|
1036 |
#endif // __AVX512FINTRIN_H |