root / lab4 / .minix-src / include / clang-3.6 / xopintrin.h @ 13
History | View | Annotate | Download (24 KB)
1 | 13 | up20180614 | /*===---- xopintrin.h - XOP intrinsics -------------------------------------===
|
---|---|---|---|
2 | *
|
||
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
4 | * of this software and associated documentation files (the "Software"), to deal
|
||
5 | * in the Software without restriction, including without limitation the rights
|
||
6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
7 | * copies of the Software, and to permit persons to whom the Software is
|
||
8 | * furnished to do so, subject to the following conditions:
|
||
9 | *
|
||
10 | * The above copyright notice and this permission notice shall be included in
|
||
11 | * all copies or substantial portions of the Software.
|
||
12 | *
|
||
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||
19 | * THE SOFTWARE.
|
||
20 | *
|
||
21 | *===-----------------------------------------------------------------------===
|
||
22 | */
|
||
23 | |||
24 | #ifndef __X86INTRIN_H
|
||
25 | #error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." |
||
26 | #endif
|
||
27 | |||
28 | #ifndef __XOPINTRIN_H
|
||
29 | #define __XOPINTRIN_H
|
||
30 | |||
31 | #ifndef __XOP__
|
||
32 | # error "XOP instruction set is not enabled" |
||
33 | #else
|
||
34 | |||
35 | #include <fma4intrin.h> |
||
36 | |||
37 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
38 | _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) |
||
39 | { |
||
40 | return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
|
||
41 | } |
||
42 | |||
43 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
44 | _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) |
||
45 | { |
||
46 | return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
|
||
47 | } |
||
48 | |||
49 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
50 | _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) |
||
51 | { |
||
52 | return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
||
53 | } |
||
54 | |||
55 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
56 | _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) |
||
57 | { |
||
58 | return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
||
59 | } |
||
60 | |||
61 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
62 | _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) |
||
63 | { |
||
64 | return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
|
||
65 | } |
||
66 | |||
67 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
68 | _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) |
||
69 | { |
||
70 | return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
|
||
71 | } |
||
72 | |||
73 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
74 | _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) |
||
75 | { |
||
76 | return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
||
77 | } |
||
78 | |||
79 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
80 | _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) |
||
81 | { |
||
82 | return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
||
83 | } |
||
84 | |||
85 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
86 | _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) |
||
87 | { |
||
88 | return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
||
89 | } |
||
90 | |||
91 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
92 | _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) |
||
93 | { |
||
94 | return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
||
95 | } |
||
96 | |||
97 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
98 | _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) |
||
99 | { |
||
100 | return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
||
101 | } |
||
102 | |||
103 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
104 | _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) |
||
105 | { |
||
106 | return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
||
107 | } |
||
108 | |||
109 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
110 | _mm_haddw_epi8(__m128i __A) |
||
111 | { |
||
112 | return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
|
||
113 | } |
||
114 | |||
115 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
116 | _mm_haddd_epi8(__m128i __A) |
||
117 | { |
||
118 | return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
|
||
119 | } |
||
120 | |||
121 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
122 | _mm_haddq_epi8(__m128i __A) |
||
123 | { |
||
124 | return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
|
||
125 | } |
||
126 | |||
127 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
128 | _mm_haddd_epi16(__m128i __A) |
||
129 | { |
||
130 | return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
|
||
131 | } |
||
132 | |||
133 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
134 | _mm_haddq_epi16(__m128i __A) |
||
135 | { |
||
136 | return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
|
||
137 | } |
||
138 | |||
139 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
140 | _mm_haddq_epi32(__m128i __A) |
||
141 | { |
||
142 | return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
|
||
143 | } |
||
144 | |||
145 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
146 | _mm_haddw_epu8(__m128i __A) |
||
147 | { |
||
148 | return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
|
||
149 | } |
||
150 | |||
151 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
152 | _mm_haddd_epu8(__m128i __A) |
||
153 | { |
||
154 | return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
|
||
155 | } |
||
156 | |||
157 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
158 | _mm_haddq_epu8(__m128i __A) |
||
159 | { |
||
160 | return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
|
||
161 | } |
||
162 | |||
163 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
164 | _mm_haddd_epu16(__m128i __A) |
||
165 | { |
||
166 | return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
|
||
167 | } |
||
168 | |||
169 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
170 | _mm_haddq_epu16(__m128i __A) |
||
171 | { |
||
172 | return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
|
||
173 | } |
||
174 | |||
175 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
176 | _mm_haddq_epu32(__m128i __A) |
||
177 | { |
||
178 | return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
|
||
179 | } |
||
180 | |||
181 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
182 | _mm_hsubw_epi8(__m128i __A) |
||
183 | { |
||
184 | return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
|
||
185 | } |
||
186 | |||
187 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
188 | _mm_hsubd_epi16(__m128i __A) |
||
189 | { |
||
190 | return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
|
||
191 | } |
||
192 | |||
193 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
194 | _mm_hsubq_epi32(__m128i __A) |
||
195 | { |
||
196 | return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
|
||
197 | } |
||
198 | |||
199 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
200 | _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) |
||
201 | { |
||
202 | return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
|
||
203 | } |
||
204 | |||
205 | static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
||
206 | _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) |
||
207 | { |
||
208 | return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
|
||
209 | } |
||
210 | |||
211 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
212 | _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) |
||
213 | { |
||
214 | return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
|
||
215 | } |
||
216 | |||
217 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
218 | _mm_rot_epi8(__m128i __A, __m128i __B) |
||
219 | { |
||
220 | return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
|
||
221 | } |
||
222 | |||
223 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
224 | _mm_rot_epi16(__m128i __A, __m128i __B) |
||
225 | { |
||
226 | return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
|
||
227 | } |
||
228 | |||
229 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
230 | _mm_rot_epi32(__m128i __A, __m128i __B) |
||
231 | { |
||
232 | return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
|
||
233 | } |
||
234 | |||
235 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
236 | _mm_rot_epi64(__m128i __A, __m128i __B) |
||
237 | { |
||
238 | return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
|
||
239 | } |
||
240 | |||
241 | #define _mm_roti_epi8(A, N) __extension__ ({ \
|
||
242 | __m128i __A = (A); \ |
||
243 | (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); }) |
||
244 | |||
245 | #define _mm_roti_epi16(A, N) __extension__ ({ \
|
||
246 | __m128i __A = (A); \ |
||
247 | (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); }) |
||
248 | |||
249 | #define _mm_roti_epi32(A, N) __extension__ ({ \
|
||
250 | __m128i __A = (A); \ |
||
251 | (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); }) |
||
252 | |||
253 | #define _mm_roti_epi64(A, N) __extension__ ({ \
|
||
254 | __m128i __A = (A); \ |
||
255 | (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); }) |
||
256 | |||
257 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
258 | _mm_shl_epi8(__m128i __A, __m128i __B) |
||
259 | { |
||
260 | return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
|
||
261 | } |
||
262 | |||
263 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
264 | _mm_shl_epi16(__m128i __A, __m128i __B) |
||
265 | { |
||
266 | return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
|
||
267 | } |
||
268 | |||
269 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
270 | _mm_shl_epi32(__m128i __A, __m128i __B) |
||
271 | { |
||
272 | return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
|
||
273 | } |
||
274 | |||
275 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
276 | _mm_shl_epi64(__m128i __A, __m128i __B) |
||
277 | { |
||
278 | return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
|
||
279 | } |
||
280 | |||
281 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
282 | _mm_sha_epi8(__m128i __A, __m128i __B) |
||
283 | { |
||
284 | return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
|
||
285 | } |
||
286 | |||
287 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
288 | _mm_sha_epi16(__m128i __A, __m128i __B) |
||
289 | { |
||
290 | return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
|
||
291 | } |
||
292 | |||
293 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
294 | _mm_sha_epi32(__m128i __A, __m128i __B) |
||
295 | { |
||
296 | return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
|
||
297 | } |
||
298 | |||
299 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
300 | _mm_sha_epi64(__m128i __A, __m128i __B) |
||
301 | { |
||
302 | return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
|
||
303 | } |
||
304 | |||
305 | #define _mm_com_epu8(A, B, N) __extension__ ({ \
|
||
306 | __m128i __A = (A); \ |
||
307 | __m128i __B = (B); \ |
||
308 | (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); }) |
||
309 | |||
310 | #define _mm_com_epu16(A, B, N) __extension__ ({ \
|
||
311 | __m128i __A = (A); \ |
||
312 | __m128i __B = (B); \ |
||
313 | (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); }) |
||
314 | |||
315 | #define _mm_com_epu32(A, B, N) __extension__ ({ \
|
||
316 | __m128i __A = (A); \ |
||
317 | __m128i __B = (B); \ |
||
318 | (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); }) |
||
319 | |||
320 | #define _mm_com_epu64(A, B, N) __extension__ ({ \
|
||
321 | __m128i __A = (A); \ |
||
322 | __m128i __B = (B); \ |
||
323 | (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); }) |
||
324 | |||
325 | #define _mm_com_epi8(A, B, N) __extension__ ({ \
|
||
326 | __m128i __A = (A); \ |
||
327 | __m128i __B = (B); \ |
||
328 | (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); }) |
||
329 | |||
330 | #define _mm_com_epi16(A, B, N) __extension__ ({ \
|
||
331 | __m128i __A = (A); \ |
||
332 | __m128i __B = (B); \ |
||
333 | (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); }) |
||
334 | |||
335 | #define _mm_com_epi32(A, B, N) __extension__ ({ \
|
||
336 | __m128i __A = (A); \ |
||
337 | __m128i __B = (B); \ |
||
338 | (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); }) |
||
339 | |||
340 | #define _mm_com_epi64(A, B, N) __extension__ ({ \
|
||
341 | __m128i __A = (A); \ |
||
342 | __m128i __B = (B); \ |
||
343 | (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) |
||
344 | |||
345 | #define _MM_PCOMCTRL_LT 0 |
||
346 | #define _MM_PCOMCTRL_LE 1 |
||
347 | #define _MM_PCOMCTRL_GT 2 |
||
348 | #define _MM_PCOMCTRL_GE 3 |
||
349 | #define _MM_PCOMCTRL_EQ 4 |
||
350 | #define _MM_PCOMCTRL_NEQ 5 |
||
351 | #define _MM_PCOMCTRL_FALSE 6 |
||
352 | #define _MM_PCOMCTRL_TRUE 7 |
||
353 | |||
354 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
355 | _mm_comlt_epu8(__m128i __A, __m128i __B) |
||
356 | { |
||
357 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
|
||
358 | } |
||
359 | |||
360 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
361 | _mm_comle_epu8(__m128i __A, __m128i __B) |
||
362 | { |
||
363 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
|
||
364 | } |
||
365 | |||
366 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
367 | _mm_comgt_epu8(__m128i __A, __m128i __B) |
||
368 | { |
||
369 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
|
||
370 | } |
||
371 | |||
372 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
373 | _mm_comge_epu8(__m128i __A, __m128i __B) |
||
374 | { |
||
375 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
|
||
376 | } |
||
377 | |||
378 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
379 | _mm_comeq_epu8(__m128i __A, __m128i __B) |
||
380 | { |
||
381 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
|
||
382 | } |
||
383 | |||
384 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
385 | _mm_comneq_epu8(__m128i __A, __m128i __B) |
||
386 | { |
||
387 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
|
||
388 | } |
||
389 | |||
390 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
391 | _mm_comfalse_epu8(__m128i __A, __m128i __B) |
||
392 | { |
||
393 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
|
||
394 | } |
||
395 | |||
396 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
397 | _mm_comtrue_epu8(__m128i __A, __m128i __B) |
||
398 | { |
||
399 | return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
|
||
400 | } |
||
401 | |||
402 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
403 | _mm_comlt_epu16(__m128i __A, __m128i __B) |
||
404 | { |
||
405 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
|
||
406 | } |
||
407 | |||
408 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
409 | _mm_comle_epu16(__m128i __A, __m128i __B) |
||
410 | { |
||
411 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
|
||
412 | } |
||
413 | |||
414 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
415 | _mm_comgt_epu16(__m128i __A, __m128i __B) |
||
416 | { |
||
417 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
|
||
418 | } |
||
419 | |||
420 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
421 | _mm_comge_epu16(__m128i __A, __m128i __B) |
||
422 | { |
||
423 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
|
||
424 | } |
||
425 | |||
426 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
427 | _mm_comeq_epu16(__m128i __A, __m128i __B) |
||
428 | { |
||
429 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
|
||
430 | } |
||
431 | |||
432 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
433 | _mm_comneq_epu16(__m128i __A, __m128i __B) |
||
434 | { |
||
435 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
|
||
436 | } |
||
437 | |||
438 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
439 | _mm_comfalse_epu16(__m128i __A, __m128i __B) |
||
440 | { |
||
441 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
|
||
442 | } |
||
443 | |||
444 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
445 | _mm_comtrue_epu16(__m128i __A, __m128i __B) |
||
446 | { |
||
447 | return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
|
||
448 | } |
||
449 | |||
450 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
451 | _mm_comlt_epu32(__m128i __A, __m128i __B) |
||
452 | { |
||
453 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
|
||
454 | } |
||
455 | |||
456 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
457 | _mm_comle_epu32(__m128i __A, __m128i __B) |
||
458 | { |
||
459 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
|
||
460 | } |
||
461 | |||
462 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
463 | _mm_comgt_epu32(__m128i __A, __m128i __B) |
||
464 | { |
||
465 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
|
||
466 | } |
||
467 | |||
468 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
469 | _mm_comge_epu32(__m128i __A, __m128i __B) |
||
470 | { |
||
471 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
|
||
472 | } |
||
473 | |||
474 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
475 | _mm_comeq_epu32(__m128i __A, __m128i __B) |
||
476 | { |
||
477 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
|
||
478 | } |
||
479 | |||
480 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
481 | _mm_comneq_epu32(__m128i __A, __m128i __B) |
||
482 | { |
||
483 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
|
||
484 | } |
||
485 | |||
486 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
487 | _mm_comfalse_epu32(__m128i __A, __m128i __B) |
||
488 | { |
||
489 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
|
||
490 | } |
||
491 | |||
492 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
493 | _mm_comtrue_epu32(__m128i __A, __m128i __B) |
||
494 | { |
||
495 | return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
|
||
496 | } |
||
497 | |||
498 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
499 | _mm_comlt_epu64(__m128i __A, __m128i __B) |
||
500 | { |
||
501 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
|
||
502 | } |
||
503 | |||
504 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
505 | _mm_comle_epu64(__m128i __A, __m128i __B) |
||
506 | { |
||
507 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
|
||
508 | } |
||
509 | |||
510 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
511 | _mm_comgt_epu64(__m128i __A, __m128i __B) |
||
512 | { |
||
513 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
|
||
514 | } |
||
515 | |||
516 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
517 | _mm_comge_epu64(__m128i __A, __m128i __B) |
||
518 | { |
||
519 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
|
||
520 | } |
||
521 | |||
522 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
523 | _mm_comeq_epu64(__m128i __A, __m128i __B) |
||
524 | { |
||
525 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
|
||
526 | } |
||
527 | |||
528 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
529 | _mm_comneq_epu64(__m128i __A, __m128i __B) |
||
530 | { |
||
531 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
|
||
532 | } |
||
533 | |||
534 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
535 | _mm_comfalse_epu64(__m128i __A, __m128i __B) |
||
536 | { |
||
537 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
|
||
538 | } |
||
539 | |||
540 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
541 | _mm_comtrue_epu64(__m128i __A, __m128i __B) |
||
542 | { |
||
543 | return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
|
||
544 | } |
||
545 | |||
546 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
547 | _mm_comlt_epi8(__m128i __A, __m128i __B) |
||
548 | { |
||
549 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
|
||
550 | } |
||
551 | |||
552 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
553 | _mm_comle_epi8(__m128i __A, __m128i __B) |
||
554 | { |
||
555 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
|
||
556 | } |
||
557 | |||
558 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
559 | _mm_comgt_epi8(__m128i __A, __m128i __B) |
||
560 | { |
||
561 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
|
||
562 | } |
||
563 | |||
564 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
565 | _mm_comge_epi8(__m128i __A, __m128i __B) |
||
566 | { |
||
567 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
|
||
568 | } |
||
569 | |||
570 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
571 | _mm_comeq_epi8(__m128i __A, __m128i __B) |
||
572 | { |
||
573 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
|
||
574 | } |
||
575 | |||
576 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
577 | _mm_comneq_epi8(__m128i __A, __m128i __B) |
||
578 | { |
||
579 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
|
||
580 | } |
||
581 | |||
582 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
583 | _mm_comfalse_epi8(__m128i __A, __m128i __B) |
||
584 | { |
||
585 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
|
||
586 | } |
||
587 | |||
588 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
589 | _mm_comtrue_epi8(__m128i __A, __m128i __B) |
||
590 | { |
||
591 | return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
|
||
592 | } |
||
593 | |||
594 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
595 | _mm_comlt_epi16(__m128i __A, __m128i __B) |
||
596 | { |
||
597 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
|
||
598 | } |
||
599 | |||
600 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
601 | _mm_comle_epi16(__m128i __A, __m128i __B) |
||
602 | { |
||
603 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
|
||
604 | } |
||
605 | |||
606 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
607 | _mm_comgt_epi16(__m128i __A, __m128i __B) |
||
608 | { |
||
609 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
|
||
610 | } |
||
611 | |||
612 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
613 | _mm_comge_epi16(__m128i __A, __m128i __B) |
||
614 | { |
||
615 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
|
||
616 | } |
||
617 | |||
618 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
619 | _mm_comeq_epi16(__m128i __A, __m128i __B) |
||
620 | { |
||
621 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
|
||
622 | } |
||
623 | |||
624 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
625 | _mm_comneq_epi16(__m128i __A, __m128i __B) |
||
626 | { |
||
627 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
|
||
628 | } |
||
629 | |||
630 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
631 | _mm_comfalse_epi16(__m128i __A, __m128i __B) |
||
632 | { |
||
633 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
|
||
634 | } |
||
635 | |||
636 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
637 | _mm_comtrue_epi16(__m128i __A, __m128i __B) |
||
638 | { |
||
639 | return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
|
||
640 | } |
||
641 | |||
642 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
643 | _mm_comlt_epi32(__m128i __A, __m128i __B) |
||
644 | { |
||
645 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
|
||
646 | } |
||
647 | |||
648 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
649 | _mm_comle_epi32(__m128i __A, __m128i __B) |
||
650 | { |
||
651 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
|
||
652 | } |
||
653 | |||
654 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
655 | _mm_comgt_epi32(__m128i __A, __m128i __B) |
||
656 | { |
||
657 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
|
||
658 | } |
||
659 | |||
660 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
661 | _mm_comge_epi32(__m128i __A, __m128i __B) |
||
662 | { |
||
663 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
|
||
664 | } |
||
665 | |||
666 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
667 | _mm_comeq_epi32(__m128i __A, __m128i __B) |
||
668 | { |
||
669 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
|
||
670 | } |
||
671 | |||
672 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
673 | _mm_comneq_epi32(__m128i __A, __m128i __B) |
||
674 | { |
||
675 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
|
||
676 | } |
||
677 | |||
678 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
679 | _mm_comfalse_epi32(__m128i __A, __m128i __B) |
||
680 | { |
||
681 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
|
||
682 | } |
||
683 | |||
684 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
685 | _mm_comtrue_epi32(__m128i __A, __m128i __B) |
||
686 | { |
||
687 | return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
|
||
688 | } |
||
689 | |||
690 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
691 | _mm_comlt_epi64(__m128i __A, __m128i __B) |
||
692 | { |
||
693 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
|
||
694 | } |
||
695 | |||
696 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
697 | _mm_comle_epi64(__m128i __A, __m128i __B) |
||
698 | { |
||
699 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
|
||
700 | } |
||
701 | |||
702 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
703 | _mm_comgt_epi64(__m128i __A, __m128i __B) |
||
704 | { |
||
705 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
|
||
706 | } |
||
707 | |||
708 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
709 | _mm_comge_epi64(__m128i __A, __m128i __B) |
||
710 | { |
||
711 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
|
||
712 | } |
||
713 | |||
714 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
715 | _mm_comeq_epi64(__m128i __A, __m128i __B) |
||
716 | { |
||
717 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
|
||
718 | } |
||
719 | |||
720 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
721 | _mm_comneq_epi64(__m128i __A, __m128i __B) |
||
722 | { |
||
723 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
|
||
724 | } |
||
725 | |||
726 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
727 | _mm_comfalse_epi64(__m128i __A, __m128i __B) |
||
728 | { |
||
729 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
|
||
730 | } |
||
731 | |||
732 | static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
||
733 | _mm_comtrue_epi64(__m128i __A, __m128i __B) |
||
734 | { |
||
735 | return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
|
||
736 | } |
||
737 | |||
738 | #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
|
||
739 | __m128d __X = (X); \ |
||
740 | __m128d __Y = (Y); \ |
||
741 | __m128i __C = (C); \ |
||
742 | (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \ |
||
743 | (__v2di)__C, (I)); }) |
||
744 | |||
745 | #define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
|
||
746 | __m256d __X = (X); \ |
||
747 | __m256d __Y = (Y); \ |
||
748 | __m256i __C = (C); \ |
||
749 | (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \ |
||
750 | (__v4di)__C, (I)); }) |
||
751 | |||
752 | #define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
|
||
753 | __m128 __X = (X); \ |
||
754 | __m128 __Y = (Y); \ |
||
755 | __m128i __C = (C); \ |
||
756 | (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \ |
||
757 | (__v4si)__C, (I)); }) |
||
758 | |||
759 | #define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
|
||
760 | __m256 __X = (X); \ |
||
761 | __m256 __Y = (Y); \ |
||
762 | __m256i __C = (C); \ |
||
763 | (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \ |
||
764 | (__v8si)__C, (I)); }) |
||
765 | |||
766 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
767 | _mm_frcz_ss(__m128 __A) |
||
768 | { |
||
769 | return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
|
||
770 | } |
||
771 | |||
772 | static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||
773 | _mm_frcz_sd(__m128d __A) |
||
774 | { |
||
775 | return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
|
||
776 | } |
||
777 | |||
778 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
779 | _mm_frcz_ps(__m128 __A) |
||
780 | { |
||
781 | return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
|
||
782 | } |
||
783 | |||
784 | static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
||
785 | _mm_frcz_pd(__m128d __A) |
||
786 | { |
||
787 | return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
|
||
788 | } |
||
789 | |||
790 | static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
|
||
791 | _mm256_frcz_ps(__m256 __A) |
||
792 | { |
||
793 | return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
|
||
794 | } |
||
795 | |||
796 | static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
|
||
797 | _mm256_frcz_pd(__m256d __A) |
||
798 | { |
||
799 | return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
|
||
800 | } |
||
801 | |||
802 | #endif /* __XOP__ */ |
||
803 | |||
804 | #endif /* __XOPINTRIN_H */ |