root / lab4 / .minix-src / include / clang-3.6 / avx512fintrin.h @ 14
History | View | Annotate | Download (34.5 KB)
1 | 13 | up20180614 | /*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
|
---|---|---|---|
2 | *
|
||
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
4 | * of this software and associated documentation files (the "Software"), to deal
|
||
5 | * in the Software without restriction, including without limitation the rights
|
||
6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
7 | * copies of the Software, and to permit persons to whom the Software is
|
||
8 | * furnished to do so, subject to the following conditions:
|
||
9 | *
|
||
10 | * The above copyright notice and this permission notice shall be included in
|
||
11 | * all copies or substantial portions of the Software.
|
||
12 | *
|
||
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||
19 | * THE SOFTWARE.
|
||
20 | *
|
||
21 | *===-----------------------------------------------------------------------===
|
||
22 | */
|
||
23 | #ifndef __IMMINTRIN_H
|
||
24 | #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." |
||
25 | #endif
|
||
26 | |||
27 | #ifndef __AVX512FINTRIN_H
|
||
28 | #define __AVX512FINTRIN_H
|
||
29 | |||
30 | typedef double __v8df __attribute__((__vector_size__(64))); |
||
31 | typedef float __v16sf __attribute__((__vector_size__(64))); |
||
32 | typedef long long __v8di __attribute__((__vector_size__(64))); |
||
33 | typedef int __v16si __attribute__((__vector_size__(64))); |
||
34 | |||
35 | typedef float __m512 __attribute__((__vector_size__(64))); |
||
36 | typedef double __m512d __attribute__((__vector_size__(64))); |
||
37 | typedef long long __m512i __attribute__((__vector_size__(64))); |
||
38 | |||
39 | typedef unsigned char __mmask8; |
||
40 | typedef unsigned short __mmask16; |
||
41 | |||
42 | /* Rounding mode macros. */
|
||
43 | #define _MM_FROUND_TO_NEAREST_INT 0x00 |
||
44 | #define _MM_FROUND_TO_NEG_INF 0x01 |
||
45 | #define _MM_FROUND_TO_POS_INF 0x02 |
||
46 | #define _MM_FROUND_TO_ZERO 0x03 |
||
47 | #define _MM_FROUND_CUR_DIRECTION 0x04 |
||
48 | |||
49 | /* Create vectors with repeated elements */
|
||
50 | |||
51 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
52 | _mm512_setzero_si512(void)
|
||
53 | { |
||
54 | return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; |
||
55 | } |
||
56 | |||
57 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
58 | _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
|
||
59 | { |
||
60 | return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
|
||
61 | (__v16si) |
||
62 | _mm512_setzero_si512 (), |
||
63 | __M); |
||
64 | } |
||
65 | |||
66 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
67 | _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) |
||
68 | { |
||
69 | #ifdef __x86_64__
|
||
70 | return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
|
||
71 | (__v8di) |
||
72 | _mm512_setzero_si512 (), |
||
73 | __M); |
||
74 | #else
|
||
75 | return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
|
||
76 | (__v8di) |
||
77 | _mm512_setzero_si512 (), |
||
78 | __M); |
||
79 | #endif
|
||
80 | } |
||
81 | |||
82 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||
83 | _mm512_setzero_ps(void)
|
||
84 | { |
||
85 | return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, |
||
86 | 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
||
87 | } |
||
88 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||
89 | _mm512_setzero_pd(void)
|
||
90 | { |
||
91 | return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
||
92 | } |
||
93 | |||
94 | static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
||
95 | _mm512_set1_ps(float __w)
|
||
96 | { |
||
97 | return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
|
||
98 | __w, __w, __w, __w, __w, __w, __w, __w }; |
||
99 | } |
||
100 | |||
101 | static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
||
102 | _mm512_set1_pd(double __w)
|
||
103 | { |
||
104 | return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
|
||
105 | } |
||
106 | |||
107 | static __inline __m512i __attribute__((__always_inline__, __nodebug__))
|
||
108 | _mm512_set1_epi32(int __s)
|
||
109 | { |
||
110 | return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
|
||
111 | __s, __s, __s, __s, __s, __s, __s, __s }; |
||
112 | } |
||
113 | |||
114 | static __inline __m512i __attribute__((__always_inline__, __nodebug__))
|
||
115 | _mm512_set1_epi64(long long __d) |
||
116 | { |
||
117 | return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
|
||
118 | } |
||
119 | |||
120 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||
121 | _mm512_broadcastss_ps(__m128 __X) |
||
122 | { |
||
123 | float __f = __X[0]; |
||
124 | return (__v16sf){ __f, __f, __f, __f,
|
||
125 | __f, __f, __f, __f, |
||
126 | __f, __f, __f, __f, |
||
127 | __f, __f, __f, __f }; |
||
128 | } |
||
129 | |||
130 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||
131 | _mm512_broadcastsd_pd(__m128d __X) |
||
132 | { |
||
133 | double __d = __X[0]; |
||
134 | return (__v8df){ __d, __d, __d, __d,
|
||
135 | __d, __d, __d, __d }; |
||
136 | } |
||
137 | |||
138 | /* Cast between vector types */
|
||
139 | |||
140 | static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
||
141 | _mm512_castpd256_pd512(__m256d __a) |
||
142 | { |
||
143 | return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); |
||
144 | } |
||
145 | |||
146 | static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
||
147 | _mm512_castps256_ps512(__m256 __a) |
||
148 | { |
||
149 | return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, |
||
150 | -1, -1, -1, -1, -1, -1, -1, -1); |
||
151 | } |
||
152 | |||
153 | static __inline __m128d __attribute__((__always_inline__, __nodebug__))
|
||
154 | _mm512_castpd512_pd128(__m512d __a) |
||
155 | { |
||
156 | return __builtin_shufflevector(__a, __a, 0, 1); |
||
157 | } |
||
158 | |||
159 | static __inline __m128 __attribute__((__always_inline__, __nodebug__))
|
||
160 | _mm512_castps512_ps128(__m512 __a) |
||
161 | { |
||
162 | return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); |
||
163 | } |
||
164 | |||
165 | /* Arithmetic */
|
||
166 | |||
167 | static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
||
168 | _mm512_add_pd(__m512d __a, __m512d __b) |
||
169 | { |
||
170 | return __a + __b;
|
||
171 | } |
||
172 | |||
173 | static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
||
174 | _mm512_add_ps(__m512 __a, __m512 __b) |
||
175 | { |
||
176 | return __a + __b;
|
||
177 | } |
||
178 | |||
179 | static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
||
180 | _mm512_mul_pd(__m512d __a, __m512d __b) |
||
181 | { |
||
182 | return __a * __b;
|
||
183 | } |
||
184 | |||
185 | static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
||
186 | _mm512_mul_ps(__m512 __a, __m512 __b) |
||
187 | { |
||
188 | return __a * __b;
|
||
189 | } |
||
190 | |||
191 | static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
||
192 | _mm512_sub_pd(__m512d __a, __m512d __b) |
||
193 | { |
||
194 | return __a - __b;
|
||
195 | } |
||
196 | |||
197 | static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
||
198 | _mm512_sub_ps(__m512 __a, __m512 __b) |
||
199 | { |
||
200 | return __a - __b;
|
||
201 | } |
||
202 | |||
203 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||
204 | _mm512_max_pd(__m512d __A, __m512d __B) |
||
205 | { |
||
206 | return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
|
||
207 | (__v8df) __B, |
||
208 | (__v8df) |
||
209 | _mm512_setzero_pd (), |
||
210 | (__mmask8) -1,
|
||
211 | _MM_FROUND_CUR_DIRECTION); |
||
212 | } |
||
213 | |||
214 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||
215 | _mm512_max_ps(__m512 __A, __m512 __B) |
||
216 | { |
||
217 | return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
|
||
218 | (__v16sf) __B, |
||
219 | (__v16sf) |
||
220 | _mm512_setzero_ps (), |
||
221 | (__mmask16) -1,
|
||
222 | _MM_FROUND_CUR_DIRECTION); |
||
223 | } |
||
224 | |||
225 | static __inline __m512i
|
||
226 | __attribute__ ((__always_inline__, __nodebug__)) |
||
227 | _mm512_max_epi32(__m512i __A, __m512i __B) |
||
228 | { |
||
229 | return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
|
||
230 | (__v16si) __B, |
||
231 | (__v16si) |
||
232 | _mm512_setzero_si512 (), |
||
233 | (__mmask16) -1);
|
||
234 | } |
||
235 | |||
236 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
237 | _mm512_max_epu32(__m512i __A, __m512i __B) |
||
238 | { |
||
239 | return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
|
||
240 | (__v16si) __B, |
||
241 | (__v16si) |
||
242 | _mm512_setzero_si512 (), |
||
243 | (__mmask16) -1);
|
||
244 | } |
||
245 | |||
246 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
247 | _mm512_max_epi64(__m512i __A, __m512i __B) |
||
248 | { |
||
249 | return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
|
||
250 | (__v8di) __B, |
||
251 | (__v8di) |
||
252 | _mm512_setzero_si512 (), |
||
253 | (__mmask8) -1);
|
||
254 | } |
||
255 | |||
256 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
257 | _mm512_max_epu64(__m512i __A, __m512i __B) |
||
258 | { |
||
259 | return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
|
||
260 | (__v8di) __B, |
||
261 | (__v8di) |
||
262 | _mm512_setzero_si512 (), |
||
263 | (__mmask8) -1);
|
||
264 | } |
||
265 | |||
266 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||
267 | _mm512_min_pd(__m512d __A, __m512d __B) |
||
268 | { |
||
269 | return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
|
||
270 | (__v8df) __B, |
||
271 | (__v8df) |
||
272 | _mm512_setzero_pd (), |
||
273 | (__mmask8) -1,
|
||
274 | _MM_FROUND_CUR_DIRECTION); |
||
275 | } |
||
276 | |||
277 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||
278 | _mm512_min_ps(__m512 __A, __m512 __B) |
||
279 | { |
||
280 | return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
|
||
281 | (__v16sf) __B, |
||
282 | (__v16sf) |
||
283 | _mm512_setzero_ps (), |
||
284 | (__mmask16) -1,
|
||
285 | _MM_FROUND_CUR_DIRECTION); |
||
286 | } |
||
287 | |||
288 | static __inline __m512i
|
||
289 | __attribute__ ((__always_inline__, __nodebug__)) |
||
290 | _mm512_min_epi32(__m512i __A, __m512i __B) |
||
291 | { |
||
292 | return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
|
||
293 | (__v16si) __B, |
||
294 | (__v16si) |
||
295 | _mm512_setzero_si512 (), |
||
296 | (__mmask16) -1);
|
||
297 | } |
||
298 | |||
299 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
300 | _mm512_min_epu32(__m512i __A, __m512i __B) |
||
301 | { |
||
302 | return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
|
||
303 | (__v16si) __B, |
||
304 | (__v16si) |
||
305 | _mm512_setzero_si512 (), |
||
306 | (__mmask16) -1);
|
||
307 | } |
||
308 | |||
309 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
310 | _mm512_min_epi64(__m512i __A, __m512i __B) |
||
311 | { |
||
312 | return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
|
||
313 | (__v8di) __B, |
||
314 | (__v8di) |
||
315 | _mm512_setzero_si512 (), |
||
316 | (__mmask8) -1);
|
||
317 | } |
||
318 | |||
319 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
320 | _mm512_min_epu64(__m512i __A, __m512i __B) |
||
321 | { |
||
322 | return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
|
||
323 | (__v8di) __B, |
||
324 | (__v8di) |
||
325 | _mm512_setzero_si512 (), |
||
326 | (__mmask8) -1);
|
||
327 | } |
||
328 | |||
329 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
330 | _mm512_mul_epi32(__m512i __X, __m512i __Y) |
||
331 | { |
||
332 | return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
|
||
333 | (__v16si) __Y, |
||
334 | (__v8di) |
||
335 | _mm512_setzero_si512 (), |
||
336 | (__mmask8) -1);
|
||
337 | } |
||
338 | |||
339 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
340 | _mm512_mul_epu32(__m512i __X, __m512i __Y) |
||
341 | { |
||
342 | return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
|
||
343 | (__v16si) __Y, |
||
344 | (__v8di) |
||
345 | _mm512_setzero_si512 (), |
||
346 | (__mmask8) -1);
|
||
347 | } |
||
348 | |||
349 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||
350 | _mm512_sqrt_pd(__m512d a) |
||
351 | { |
||
352 | return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
|
||
353 | (__v8df) _mm512_setzero_pd (), |
||
354 | (__mmask8) -1,
|
||
355 | _MM_FROUND_CUR_DIRECTION); |
||
356 | } |
||
357 | |||
358 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||
359 | _mm512_sqrt_ps(__m512 a) |
||
360 | { |
||
361 | return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
|
||
362 | (__v16sf) _mm512_setzero_ps (), |
||
363 | (__mmask16) -1,
|
||
364 | _MM_FROUND_CUR_DIRECTION); |
||
365 | } |
||
366 | |||
367 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||
368 | _mm512_rsqrt14_pd(__m512d __A) |
||
369 | { |
||
370 | return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
|
||
371 | (__v8df) |
||
372 | _mm512_setzero_pd (), |
||
373 | (__mmask8) -1);}
|
||
374 | |||
375 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||
376 | _mm512_rsqrt14_ps(__m512 __A) |
||
377 | { |
||
378 | return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
|
||
379 | (__v16sf) |
||
380 | _mm512_setzero_ps (), |
||
381 | (__mmask16) -1);
|
||
382 | } |
||
383 | |||
384 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
385 | _mm_rsqrt14_ss(__m128 __A, __m128 __B) |
||
386 | { |
||
387 | return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
|
||
388 | (__v4sf) __B, |
||
389 | (__v4sf) |
||
390 | _mm_setzero_ps (), |
||
391 | (__mmask8) -1);
|
||
392 | } |
||
393 | |||
394 | static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||
395 | _mm_rsqrt14_sd(__m128d __A, __m128d __B) |
||
396 | { |
||
397 | return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
|
||
398 | (__v2df) __B, |
||
399 | (__v2df) |
||
400 | _mm_setzero_pd (), |
||
401 | (__mmask8) -1);
|
||
402 | } |
||
403 | |||
404 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||
405 | _mm512_rcp14_pd(__m512d __A) |
||
406 | { |
||
407 | return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
|
||
408 | (__v8df) |
||
409 | _mm512_setzero_pd (), |
||
410 | (__mmask8) -1);
|
||
411 | } |
||
412 | |||
413 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||
414 | _mm512_rcp14_ps(__m512 __A) |
||
415 | { |
||
416 | return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
|
||
417 | (__v16sf) |
||
418 | _mm512_setzero_ps (), |
||
419 | (__mmask16) -1);
|
||
420 | } |
||
421 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
422 | _mm_rcp14_ss(__m128 __A, __m128 __B) |
||
423 | { |
||
424 | return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
|
||
425 | (__v4sf) __B, |
||
426 | (__v4sf) |
||
427 | _mm_setzero_ps (), |
||
428 | (__mmask8) -1);
|
||
429 | } |
||
430 | |||
431 | static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||
432 | _mm_rcp14_sd(__m128d __A, __m128d __B) |
||
433 | { |
||
434 | return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
|
||
435 | (__v2df) __B, |
||
436 | (__v2df) |
||
437 | _mm_setzero_pd (), |
||
438 | (__mmask8) -1);
|
||
439 | } |
||
440 | |||
441 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||
442 | _mm512_floor_ps(__m512 __A) |
||
443 | { |
||
444 | return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
|
||
445 | _MM_FROUND_FLOOR, |
||
446 | (__v16sf) __A, -1,
|
||
447 | _MM_FROUND_CUR_DIRECTION); |
||
448 | } |
||
449 | |||
450 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||
451 | _mm512_floor_pd(__m512d __A) |
||
452 | { |
||
453 | return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
|
||
454 | _MM_FROUND_FLOOR, |
||
455 | (__v8df) __A, -1,
|
||
456 | _MM_FROUND_CUR_DIRECTION); |
||
457 | } |
||
458 | |||
459 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||
460 | _mm512_ceil_ps(__m512 __A) |
||
461 | { |
||
462 | return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
|
||
463 | _MM_FROUND_CEIL, |
||
464 | (__v16sf) __A, -1,
|
||
465 | _MM_FROUND_CUR_DIRECTION); |
||
466 | } |
||
467 | |||
468 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||
469 | _mm512_ceil_pd(__m512d __A) |
||
470 | { |
||
471 | return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
|
||
472 | _MM_FROUND_CEIL, |
||
473 | (__v8df) __A, -1,
|
||
474 | _MM_FROUND_CUR_DIRECTION); |
||
475 | } |
||
476 | |||
477 | static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
|
||
478 | _mm512_abs_epi64(__m512i __A) |
||
479 | { |
||
480 | return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
|
||
481 | (__v8di) |
||
482 | _mm512_setzero_si512 (), |
||
483 | (__mmask8) -1);
|
||
484 | } |
||
485 | |||
486 | static __inline __m512i __attribute__ (( __always_inline__, __nodebug__))
|
||
487 | _mm512_abs_epi32(__m512i __A) |
||
488 | { |
||
489 | return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
|
||
490 | (__v16si) |
||
491 | _mm512_setzero_si512 (), |
||
492 | (__mmask16) -1);
|
||
493 | } |
||
494 | |||
495 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||
496 | _mm512_roundscale_ps(__m512 __A, const int __imm) |
||
497 | { |
||
498 | return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
|
||
499 | (__v16sf) __A, -1,
|
||
500 | _MM_FROUND_CUR_DIRECTION); |
||
501 | } |
||
502 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||
503 | _mm512_roundscale_pd(__m512d __A, const int __imm) |
||
504 | { |
||
505 | return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
|
||
506 | (__v8df) __A, -1,
|
||
507 | _MM_FROUND_CUR_DIRECTION); |
||
508 | } |
||
509 | |||
510 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||
511 | _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) |
||
512 | { |
||
513 | return (__m512d)
|
||
514 | __builtin_ia32_vfmaddpd512_mask(__A, |
||
515 | __B, |
||
516 | __C, |
||
517 | (__mmask8) -1,
|
||
518 | _MM_FROUND_CUR_DIRECTION); |
||
519 | } |
||
520 | |||
521 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||
522 | _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) |
||
523 | { |
||
524 | return (__m512d)
|
||
525 | __builtin_ia32_vfmsubpd512_mask(__A, |
||
526 | __B, |
||
527 | __C, |
||
528 | (__mmask8) -1,
|
||
529 | _MM_FROUND_CUR_DIRECTION); |
||
530 | } |
||
531 | |||
532 | static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
|
||
533 | _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) |
||
534 | { |
||
535 | return (__m512d)
|
||
536 | __builtin_ia32_vfnmaddpd512_mask(__A, |
||
537 | __B, |
||
538 | __C, |
||
539 | (__mmask8) -1,
|
||
540 | _MM_FROUND_CUR_DIRECTION); |
||
541 | } |
||
542 | |||
543 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||
544 | _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) |
||
545 | { |
||
546 | return (__m512)
|
||
547 | __builtin_ia32_vfmaddps512_mask(__A, |
||
548 | __B, |
||
549 | __C, |
||
550 | (__mmask16) -1,
|
||
551 | _MM_FROUND_CUR_DIRECTION); |
||
552 | } |
||
553 | |||
554 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||
555 | _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) |
||
556 | { |
||
557 | return (__m512)
|
||
558 | __builtin_ia32_vfmsubps512_mask(__A, |
||
559 | __B, |
||
560 | __C, |
||
561 | (__mmask16) -1,
|
||
562 | _MM_FROUND_CUR_DIRECTION); |
||
563 | } |
||
564 | |||
565 | static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
|
||
566 | _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) |
||
567 | { |
||
568 | return (__m512)
|
||
569 | __builtin_ia32_vfnmaddps512_mask(__A, |
||
570 | __B, |
||
571 | __C, |
||
572 | (__mmask16) -1,
|
||
573 | _MM_FROUND_CUR_DIRECTION); |
||
574 | } |
||
575 | |||
576 | /* Vector permutations */
|
||
577 | |||
578 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
579 | _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) |
||
580 | { |
||
581 | return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
|
||
582 | /* idx */ ,
|
||
583 | (__v16si) __A, |
||
584 | (__v16si) __B, |
||
585 | (__mmask16) -1);
|
||
586 | } |
||
587 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
588 | _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) |
||
589 | { |
||
590 | return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
|
||
591 | /* idx */ ,
|
||
592 | (__v8di) __A, |
||
593 | (__v8di) __B, |
||
594 | (__mmask8) -1);
|
||
595 | } |
||
596 | |||
597 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||
598 | _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) |
||
599 | { |
||
600 | return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
|
||
601 | /* idx */ ,
|
||
602 | (__v8df) __A, |
||
603 | (__v8df) __B, |
||
604 | (__mmask8) -1);
|
||
605 | } |
||
606 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||
607 | _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) |
||
608 | { |
||
609 | return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
|
||
610 | /* idx */ ,
|
||
611 | (__v16sf) __A, |
||
612 | (__v16sf) __B, |
||
613 | (__mmask16) -1);
|
||
614 | } |
||
615 | |||
616 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
617 | _mm512_valign_epi64(__m512i __A, __m512i __B, const int __I) |
||
618 | { |
||
619 | return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A,
|
||
620 | (__v8di)__B, |
||
621 | __I, |
||
622 | (__v8di)_mm512_setzero_si512(), |
||
623 | (__mmask8) -1);
|
||
624 | } |
||
625 | |||
626 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
627 | _mm512_valign_epi32(__m512i __A, __m512i __B, const int __I) |
||
628 | { |
||
629 | return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A,
|
||
630 | (__v16si)__B, |
||
631 | __I, |
||
632 | (__v16si)_mm512_setzero_si512(), |
||
633 | (__mmask16) -1);
|
||
634 | } |
||
635 | |||
636 | /* Vector Blend */
|
||
637 | |||
638 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||
639 | _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) |
||
640 | { |
||
641 | return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
|
||
642 | (__v8df) __W, |
||
643 | (__mmask8) __U); |
||
644 | } |
||
645 | |||
646 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||
647 | _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) |
||
648 | { |
||
649 | return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
|
||
650 | (__v16sf) __W, |
||
651 | (__mmask16) __U); |
||
652 | } |
||
653 | |||
654 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
655 | _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) |
||
656 | { |
||
657 | return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
|
||
658 | (__v8di) __W, |
||
659 | (__mmask8) __U); |
||
660 | } |
||
661 | |||
662 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
663 | _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) |
||
664 | { |
||
665 | return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
|
||
666 | (__v16si) __W, |
||
667 | (__mmask16) __U); |
||
668 | } |
||
669 | |||
670 | /* Compare */
|
||
671 | |||
672 | static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
|
||
673 | _mm512_cmp_ps_mask(__m512 a, __m512 b, const int p) |
||
674 | { |
||
675 | return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) a,
|
||
676 | (__v16sf) b, p, (__mmask16) -1,
|
||
677 | _MM_FROUND_CUR_DIRECTION); |
||
678 | } |
||
679 | |||
680 | static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
|
||
681 | _mm512_cmp_pd_mask(__m512d __X, __m512d __Y, const int __P) |
||
682 | { |
||
683 | return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
|
||
684 | (__v8df) __Y, __P, |
||
685 | (__mmask8) -1,
|
||
686 | _MM_FROUND_CUR_DIRECTION); |
||
687 | } |
||
688 | |||
689 | /* Conversion */
|
||
690 | |||
691 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
692 | _mm512_cvttps_epu32(__m512 __A) |
||
693 | { |
||
694 | return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
|
||
695 | (__v16si) |
||
696 | _mm512_setzero_si512 (), |
||
697 | (__mmask16) -1,
|
||
698 | _MM_FROUND_CUR_DIRECTION); |
||
699 | } |
||
700 | |||
701 | static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
|
||
702 | _mm512_cvt_roundepi32_ps(__m512i __A, const int __R) |
||
703 | { |
||
704 | return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
|
||
705 | (__v16sf) |
||
706 | _mm512_setzero_ps (), |
||
707 | (__mmask16) -1,
|
||
708 | __R); |
||
709 | } |
||
710 | |||
711 | static __inline __m512 __attribute__ (( __always_inline__, __nodebug__))
|
||
712 | _mm512_cvt_roundepu32_ps(__m512i __A, const int __R) |
||
713 | { |
||
714 | return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
|
||
715 | (__v16sf) |
||
716 | _mm512_setzero_ps (), |
||
717 | (__mmask16) -1,
|
||
718 | __R); |
||
719 | } |
||
720 | |||
721 | static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
|
||
722 | _mm512_cvtepi32_pd(__m256i __A) |
||
723 | { |
||
724 | return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
|
||
725 | (__v8df) |
||
726 | _mm512_setzero_pd (), |
||
727 | (__mmask8) -1);
|
||
728 | } |
||
729 | |||
730 | static __inline __m512d __attribute__ (( __always_inline__, __nodebug__))
|
||
731 | _mm512_cvtepu32_pd(__m256i __A) |
||
732 | { |
||
733 | return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
|
||
734 | (__v8df) |
||
735 | _mm512_setzero_pd (), |
||
736 | (__mmask8) -1);
|
||
737 | } |
||
738 | static __inline __m256 __attribute__ (( __always_inline__, __nodebug__))
|
||
739 | _mm512_cvt_roundpd_ps(__m512d __A, const int __R) |
||
740 | { |
||
741 | return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
|
||
742 | (__v8sf) |
||
743 | _mm256_setzero_ps (), |
||
744 | (__mmask8) -1,
|
||
745 | __R); |
||
746 | } |
||
747 | |||
748 | static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
||
749 | _mm512_cvtps_ph(__m512 __A, const int __I) |
||
750 | { |
||
751 | return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
|
||
752 | __I, |
||
753 | (__v16hi) |
||
754 | _mm256_setzero_si256 (), |
||
755 | -1);
|
||
756 | } |
||
757 | |||
758 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||
759 | _mm512_cvtph_ps(__m256i __A) |
||
760 | { |
||
761 | return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
|
||
762 | (__v16sf) |
||
763 | _mm512_setzero_ps (), |
||
764 | (__mmask16) -1,
|
||
765 | _MM_FROUND_CUR_DIRECTION); |
||
766 | } |
||
767 | |||
768 | static __inline __m512i __attribute__((__always_inline__, __nodebug__))
|
||
769 | _mm512_cvttps_epi32(__m512 a) |
||
770 | { |
||
771 | return (__m512i)
|
||
772 | __builtin_ia32_cvttps2dq512_mask((__v16sf) a, |
||
773 | (__v16si) _mm512_setzero_si512 (), |
||
774 | (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
|
||
775 | } |
||
776 | |||
777 | static __inline __m256i __attribute__((__always_inline__, __nodebug__))
|
||
778 | _mm512_cvttpd_epi32(__m512d a) |
||
779 | { |
||
780 | return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
|
||
781 | (__v8si)_mm256_setzero_si256(), |
||
782 | (__mmask8) -1,
|
||
783 | _MM_FROUND_CUR_DIRECTION); |
||
784 | } |
||
785 | |||
786 | static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
||
787 | _mm512_cvtt_roundpd_epi32(__m512d __A, const int __R) |
||
788 | { |
||
789 | return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
|
||
790 | (__v8si) |
||
791 | _mm256_setzero_si256 (), |
||
792 | (__mmask8) -1,
|
||
793 | __R); |
||
794 | } |
||
795 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
796 | _mm512_cvtt_roundps_epi32(__m512 __A, const int __R) |
||
797 | { |
||
798 | return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
|
||
799 | (__v16si) |
||
800 | _mm512_setzero_si512 (), |
||
801 | (__mmask16) -1,
|
||
802 | __R); |
||
803 | } |
||
804 | |||
805 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
806 | _mm512_cvt_roundps_epi32(__m512 __A, const int __R) |
||
807 | { |
||
808 | return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
|
||
809 | (__v16si) |
||
810 | _mm512_setzero_si512 (), |
||
811 | (__mmask16) -1,
|
||
812 | __R); |
||
813 | } |
||
814 | static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
||
815 | _mm512_cvt_roundpd_epi32(__m512d __A, const int __R) |
||
816 | { |
||
817 | return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
|
||
818 | (__v8si) |
||
819 | _mm256_setzero_si256 (), |
||
820 | (__mmask8) -1,
|
||
821 | __R); |
||
822 | } |
||
823 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
824 | _mm512_cvt_roundps_epu32(__m512 __A, const int __R) |
||
825 | { |
||
826 | return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
|
||
827 | (__v16si) |
||
828 | _mm512_setzero_si512 (), |
||
829 | (__mmask16) -1,
|
||
830 | __R); |
||
831 | } |
||
832 | static __inline __m256i __attribute__ ((__always_inline__, __nodebug__))
|
||
833 | _mm512_cvt_roundpd_epu32(__m512d __A, const int __R) |
||
834 | { |
||
835 | return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
|
||
836 | (__v8si) |
||
837 | _mm256_setzero_si256 (), |
||
838 | (__mmask8) -1,
|
||
839 | __R); |
||
840 | } |
||
841 | |||
842 | /* Unpack and Interleave */
|
||
843 | static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
||
844 | _mm512_unpackhi_pd(__m512d __a, __m512d __b) |
||
845 | { |
||
846 | return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); |
||
847 | } |
||
848 | |||
849 | static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
||
850 | _mm512_unpacklo_pd(__m512d __a, __m512d __b) |
||
851 | { |
||
852 | return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); |
||
853 | } |
||
854 | |||
855 | static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
||
856 | _mm512_unpackhi_ps(__m512 __a, __m512 __b) |
||
857 | { |
||
858 | return __builtin_shufflevector(__a, __b,
|
||
859 | 2, 18, 3, 19, |
||
860 | 2+4, 18+4, 3+4, 19+4, |
||
861 | 2+8, 18+8, 3+8, 19+8, |
||
862 | 2+12, 18+12, 3+12, 19+12); |
||
863 | } |
||
864 | |||
865 | static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
||
866 | _mm512_unpacklo_ps(__m512 __a, __m512 __b) |
||
867 | { |
||
868 | return __builtin_shufflevector(__a, __b,
|
||
869 | 0, 16, 1, 17, |
||
870 | 0+4, 16+4, 1+4, 17+4, |
||
871 | 0+8, 16+8, 1+8, 17+8, |
||
872 | 0+12, 16+12, 1+12, 17+12); |
||
873 | } |
||
874 | |||
875 | /* Bit Test */
|
||
876 | |||
877 | static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
|
||
878 | _mm512_test_epi32_mask(__m512i __A, __m512i __B) |
||
879 | { |
||
880 | return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
|
||
881 | (__v16si) __B, |
||
882 | (__mmask16) -1);
|
||
883 | } |
||
884 | |||
885 | static __inline __mmask8 __attribute__ ((__always_inline__, __nodebug__))
|
||
886 | _mm512_test_epi64_mask(__m512i __A, __m512i __B) |
||
887 | { |
||
888 | return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
|
||
889 | (__v8di) __B, |
||
890 | (__mmask8) -1);
|
||
891 | } |
||
892 | |||
893 | /* SIMD load ops */
|
||
894 | |||
895 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
896 | _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) |
||
897 | { |
||
898 | return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P, |
||
899 | (__v16si) |
||
900 | _mm512_setzero_si512 (), |
||
901 | (__mmask16) __U); |
||
902 | } |
||
903 | |||
904 | static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
|
||
905 | _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) |
||
906 | { |
||
907 | return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P, |
||
908 | (__v8di) |
||
909 | _mm512_setzero_si512 (), |
||
910 | (__mmask8) __U); |
||
911 | } |
||
912 | |||
913 | static __inline __m512 __attribute__ ((__always_inline__, __nodebug__))
|
||
914 | _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) |
||
915 | { |
||
916 | return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P, |
||
917 | (__v16sf) |
||
918 | _mm512_setzero_ps (), |
||
919 | (__mmask16) __U); |
||
920 | } |
||
921 | |||
922 | static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
|
||
923 | _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) |
||
924 | { |
||
925 | return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P, |
||
926 | (__v8df) |
||
927 | _mm512_setzero_pd (), |
||
928 | (__mmask8) __U); |
||
929 | } |
||
930 | |||
931 | static __inline __m512d __attribute__((__always_inline__, __nodebug__))
|
||
932 | _mm512_loadu_pd(double const *__p) |
||
933 | { |
||
934 | struct __loadu_pd {
|
||
935 | __m512d __v; |
||
936 | } __attribute__((packed, may_alias)); |
||
937 | return ((struct __loadu_pd*)__p)->__v; |
||
938 | } |
||
939 | |||
940 | static __inline __m512 __attribute__((__always_inline__, __nodebug__))
|
||
941 | _mm512_loadu_ps(float const *__p) |
||
942 | { |
||
943 | struct __loadu_ps {
|
||
944 | __m512 __v; |
||
945 | } __attribute__((packed, may_alias)); |
||
946 | return ((struct __loadu_ps*)__p)->__v; |
||
947 | } |
||
948 | |||
949 | /* SIMD store ops */
|
||
950 | |||
951 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
||
952 | _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
|
||
953 | { |
||
954 | __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A, |
||
955 | (__mmask8) __U); |
||
956 | } |
||
957 | |||
958 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
||
959 | _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
|
||
960 | { |
||
961 | __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A, |
||
962 | (__mmask16) __U); |
||
963 | } |
||
964 | |||
965 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
||
966 | _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
|
||
967 | { |
||
968 | __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); |
||
969 | } |
||
970 | |||
971 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
||
972 | _mm512_storeu_pd(void *__P, __m512d __A)
|
||
973 | { |
||
974 | __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1);
|
||
975 | } |
||
976 | |||
977 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
||
978 | _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
|
||
979 | { |
||
980 | __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A, |
||
981 | (__mmask16) __U); |
||
982 | } |
||
983 | |||
984 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
||
985 | _mm512_storeu_ps(void *__P, __m512 __A)
|
||
986 | { |
||
987 | __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1);
|
||
988 | } |
||
989 | |||
990 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
||
991 | _mm512_store_ps(void *__P, __m512 __A)
|
||
992 | { |
||
993 | *(__m512*)__P = __A; |
||
994 | } |
||
995 | |||
996 | static __inline void __attribute__ ((__always_inline__, __nodebug__)) |
||
997 | _mm512_store_pd(void *__P, __m512d __A)
|
||
998 | { |
||
999 | *(__m512d*)__P = __A; |
||
1000 | } |
||
1001 | |||
1002 | /* Mask ops */
|
||
1003 | |||
1004 | static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__))
|
||
1005 | _mm512_knot(__mmask16 __M) |
||
1006 | { |
||
1007 | return __builtin_ia32_knothi(__M);
|
||
1008 | } |
||
1009 | |||
1010 | /* Integer compare */
|
||
1011 | |||
1012 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
|
||
1013 | _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { |
||
1014 | return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
|
||
1015 | (__mmask16)-1);
|
||
1016 | } |
||
1017 | |||
1018 | static __inline__ __mmask16 __attribute__((__always_inline__, __nodebug__))
|
||
1019 | _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { |
||
1020 | return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
|
||
1021 | __u); |
||
1022 | } |
||
1023 | |||
1024 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
|
||
1025 | _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { |
||
1026 | return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
|
||
1027 | __u); |
||
1028 | } |
||
1029 | |||
1030 | static __inline__ __mmask8 __attribute__((__always_inline__, __nodebug__))
|
||
1031 | _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { |
||
1032 | return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
|
||
1033 | (__mmask8)-1);
|
||
1034 | } |
||
1035 | |||
1036 | #endif // __AVX512FINTRIN_H |