root / lab4 / .minix-src / include / clang-3.6 / xopintrin.h @ 13
History | View | Annotate | Download (24 KB)
1 |
/*===---- xopintrin.h - XOP intrinsics -------------------------------------===
|
---|---|
2 |
*
|
3 |
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
4 |
* of this software and associated documentation files (the "Software"), to deal
|
5 |
* in the Software without restriction, including without limitation the rights
|
6 |
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7 |
* copies of the Software, and to permit persons to whom the Software is
|
8 |
* furnished to do so, subject to the following conditions:
|
9 |
*
|
10 |
* The above copyright notice and this permission notice shall be included in
|
11 |
* all copies or substantial portions of the Software.
|
12 |
*
|
13 |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14 |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15 |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16 |
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17 |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18 |
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19 |
* THE SOFTWARE.
|
20 |
*
|
21 |
*===-----------------------------------------------------------------------===
|
22 |
*/
|
23 |
|
24 |
#ifndef __X86INTRIN_H
|
25 |
#error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." |
26 |
#endif
|
27 |
|
28 |
#ifndef __XOPINTRIN_H
|
29 |
#define __XOPINTRIN_H
|
30 |
|
31 |
#ifndef __XOP__
|
32 |
# error "XOP instruction set is not enabled" |
33 |
#else
|
34 |
|
35 |
#include <fma4intrin.h> |
36 |
|
37 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
38 |
_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) |
39 |
{ |
40 |
return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
|
41 |
} |
42 |
|
43 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
44 |
_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) |
45 |
{ |
46 |
return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
|
47 |
} |
48 |
|
49 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
50 |
_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) |
51 |
{ |
52 |
return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
53 |
} |
54 |
|
55 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
56 |
_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) |
57 |
{ |
58 |
return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
59 |
} |
60 |
|
61 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
62 |
_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) |
63 |
{ |
64 |
return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
|
65 |
} |
66 |
|
67 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
68 |
_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) |
69 |
{ |
70 |
return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
|
71 |
} |
72 |
|
73 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
74 |
_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) |
75 |
{ |
76 |
return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
77 |
} |
78 |
|
79 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
80 |
_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) |
81 |
{ |
82 |
return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
83 |
} |
84 |
|
85 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
86 |
_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) |
87 |
{ |
88 |
return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
89 |
} |
90 |
|
91 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
92 |
_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) |
93 |
{ |
94 |
return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
|
95 |
} |
96 |
|
97 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
98 |
_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) |
99 |
{ |
100 |
return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
101 |
} |
102 |
|
103 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
104 |
_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) |
105 |
{ |
106 |
return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
|
107 |
} |
108 |
|
109 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
110 |
_mm_haddw_epi8(__m128i __A) |
111 |
{ |
112 |
return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
|
113 |
} |
114 |
|
115 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
116 |
_mm_haddd_epi8(__m128i __A) |
117 |
{ |
118 |
return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
|
119 |
} |
120 |
|
121 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
122 |
_mm_haddq_epi8(__m128i __A) |
123 |
{ |
124 |
return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
|
125 |
} |
126 |
|
127 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
128 |
_mm_haddd_epi16(__m128i __A) |
129 |
{ |
130 |
return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
|
131 |
} |
132 |
|
133 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
134 |
_mm_haddq_epi16(__m128i __A) |
135 |
{ |
136 |
return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
|
137 |
} |
138 |
|
139 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
140 |
_mm_haddq_epi32(__m128i __A) |
141 |
{ |
142 |
return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
|
143 |
} |
144 |
|
145 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
146 |
_mm_haddw_epu8(__m128i __A) |
147 |
{ |
148 |
return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
|
149 |
} |
150 |
|
151 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
152 |
_mm_haddd_epu8(__m128i __A) |
153 |
{ |
154 |
return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
|
155 |
} |
156 |
|
157 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
158 |
_mm_haddq_epu8(__m128i __A) |
159 |
{ |
160 |
return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
|
161 |
} |
162 |
|
163 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
164 |
_mm_haddd_epu16(__m128i __A) |
165 |
{ |
166 |
return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
|
167 |
} |
168 |
|
169 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
170 |
_mm_haddq_epu16(__m128i __A) |
171 |
{ |
172 |
return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
|
173 |
} |
174 |
|
175 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
176 |
_mm_haddq_epu32(__m128i __A) |
177 |
{ |
178 |
return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
|
179 |
} |
180 |
|
181 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
182 |
_mm_hsubw_epi8(__m128i __A) |
183 |
{ |
184 |
return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
|
185 |
} |
186 |
|
187 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
188 |
_mm_hsubd_epi16(__m128i __A) |
189 |
{ |
190 |
return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
|
191 |
} |
192 |
|
193 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
194 |
_mm_hsubq_epi32(__m128i __A) |
195 |
{ |
196 |
return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
|
197 |
} |
198 |
|
199 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
200 |
_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) |
201 |
{ |
202 |
return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C);
|
203 |
} |
204 |
|
205 |
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
|
206 |
_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) |
207 |
{ |
208 |
return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C);
|
209 |
} |
210 |
|
211 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
212 |
_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) |
213 |
{ |
214 |
return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
|
215 |
} |
216 |
|
217 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
218 |
_mm_rot_epi8(__m128i __A, __m128i __B) |
219 |
{ |
220 |
return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
|
221 |
} |
222 |
|
223 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
224 |
_mm_rot_epi16(__m128i __A, __m128i __B) |
225 |
{ |
226 |
return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
|
227 |
} |
228 |
|
229 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
230 |
_mm_rot_epi32(__m128i __A, __m128i __B) |
231 |
{ |
232 |
return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
|
233 |
} |
234 |
|
235 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
236 |
_mm_rot_epi64(__m128i __A, __m128i __B) |
237 |
{ |
238 |
return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
|
239 |
} |
240 |
|
241 |
#define _mm_roti_epi8(A, N) __extension__ ({ \
|
242 |
__m128i __A = (A); \ |
243 |
(__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); }) |
244 |
|
245 |
#define _mm_roti_epi16(A, N) __extension__ ({ \
|
246 |
__m128i __A = (A); \ |
247 |
(__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); }) |
248 |
|
249 |
#define _mm_roti_epi32(A, N) __extension__ ({ \
|
250 |
__m128i __A = (A); \ |
251 |
(__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); }) |
252 |
|
253 |
#define _mm_roti_epi64(A, N) __extension__ ({ \
|
254 |
__m128i __A = (A); \ |
255 |
(__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); }) |
256 |
|
257 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
258 |
_mm_shl_epi8(__m128i __A, __m128i __B) |
259 |
{ |
260 |
return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
|
261 |
} |
262 |
|
263 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
264 |
_mm_shl_epi16(__m128i __A, __m128i __B) |
265 |
{ |
266 |
return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
|
267 |
} |
268 |
|
269 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
270 |
_mm_shl_epi32(__m128i __A, __m128i __B) |
271 |
{ |
272 |
return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
|
273 |
} |
274 |
|
275 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
276 |
_mm_shl_epi64(__m128i __A, __m128i __B) |
277 |
{ |
278 |
return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
|
279 |
} |
280 |
|
281 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
282 |
_mm_sha_epi8(__m128i __A, __m128i __B) |
283 |
{ |
284 |
return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
|
285 |
} |
286 |
|
287 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
288 |
_mm_sha_epi16(__m128i __A, __m128i __B) |
289 |
{ |
290 |
return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
|
291 |
} |
292 |
|
293 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
294 |
_mm_sha_epi32(__m128i __A, __m128i __B) |
295 |
{ |
296 |
return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
|
297 |
} |
298 |
|
299 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
300 |
_mm_sha_epi64(__m128i __A, __m128i __B) |
301 |
{ |
302 |
return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
|
303 |
} |
304 |
|
305 |
#define _mm_com_epu8(A, B, N) __extension__ ({ \
|
306 |
__m128i __A = (A); \ |
307 |
__m128i __B = (B); \ |
308 |
(__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); }) |
309 |
|
310 |
#define _mm_com_epu16(A, B, N) __extension__ ({ \
|
311 |
__m128i __A = (A); \ |
312 |
__m128i __B = (B); \ |
313 |
(__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); }) |
314 |
|
315 |
#define _mm_com_epu32(A, B, N) __extension__ ({ \
|
316 |
__m128i __A = (A); \ |
317 |
__m128i __B = (B); \ |
318 |
(__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); }) |
319 |
|
320 |
#define _mm_com_epu64(A, B, N) __extension__ ({ \
|
321 |
__m128i __A = (A); \ |
322 |
__m128i __B = (B); \ |
323 |
(__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); }) |
324 |
|
325 |
#define _mm_com_epi8(A, B, N) __extension__ ({ \
|
326 |
__m128i __A = (A); \ |
327 |
__m128i __B = (B); \ |
328 |
(__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); }) |
329 |
|
330 |
#define _mm_com_epi16(A, B, N) __extension__ ({ \
|
331 |
__m128i __A = (A); \ |
332 |
__m128i __B = (B); \ |
333 |
(__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); }) |
334 |
|
335 |
#define _mm_com_epi32(A, B, N) __extension__ ({ \
|
336 |
__m128i __A = (A); \ |
337 |
__m128i __B = (B); \ |
338 |
(__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); }) |
339 |
|
340 |
#define _mm_com_epi64(A, B, N) __extension__ ({ \
|
341 |
__m128i __A = (A); \ |
342 |
__m128i __B = (B); \ |
343 |
(__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) |
344 |
|
345 |
#define _MM_PCOMCTRL_LT 0 |
346 |
#define _MM_PCOMCTRL_LE 1 |
347 |
#define _MM_PCOMCTRL_GT 2 |
348 |
#define _MM_PCOMCTRL_GE 3 |
349 |
#define _MM_PCOMCTRL_EQ 4 |
350 |
#define _MM_PCOMCTRL_NEQ 5 |
351 |
#define _MM_PCOMCTRL_FALSE 6 |
352 |
#define _MM_PCOMCTRL_TRUE 7 |
353 |
|
354 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
355 |
_mm_comlt_epu8(__m128i __A, __m128i __B) |
356 |
{ |
357 |
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
|
358 |
} |
359 |
|
360 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
361 |
_mm_comle_epu8(__m128i __A, __m128i __B) |
362 |
{ |
363 |
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
|
364 |
} |
365 |
|
366 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
367 |
_mm_comgt_epu8(__m128i __A, __m128i __B) |
368 |
{ |
369 |
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
|
370 |
} |
371 |
|
372 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
373 |
_mm_comge_epu8(__m128i __A, __m128i __B) |
374 |
{ |
375 |
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
|
376 |
} |
377 |
|
378 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
379 |
_mm_comeq_epu8(__m128i __A, __m128i __B) |
380 |
{ |
381 |
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
|
382 |
} |
383 |
|
384 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
385 |
_mm_comneq_epu8(__m128i __A, __m128i __B) |
386 |
{ |
387 |
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
|
388 |
} |
389 |
|
390 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
391 |
_mm_comfalse_epu8(__m128i __A, __m128i __B) |
392 |
{ |
393 |
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
|
394 |
} |
395 |
|
396 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
397 |
_mm_comtrue_epu8(__m128i __A, __m128i __B) |
398 |
{ |
399 |
return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
|
400 |
} |
401 |
|
402 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
403 |
_mm_comlt_epu16(__m128i __A, __m128i __B) |
404 |
{ |
405 |
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
|
406 |
} |
407 |
|
408 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
409 |
_mm_comle_epu16(__m128i __A, __m128i __B) |
410 |
{ |
411 |
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
|
412 |
} |
413 |
|
414 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
415 |
_mm_comgt_epu16(__m128i __A, __m128i __B) |
416 |
{ |
417 |
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
|
418 |
} |
419 |
|
420 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
421 |
_mm_comge_epu16(__m128i __A, __m128i __B) |
422 |
{ |
423 |
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
|
424 |
} |
425 |
|
426 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
427 |
_mm_comeq_epu16(__m128i __A, __m128i __B) |
428 |
{ |
429 |
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
|
430 |
} |
431 |
|
432 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
433 |
_mm_comneq_epu16(__m128i __A, __m128i __B) |
434 |
{ |
435 |
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
|
436 |
} |
437 |
|
438 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
439 |
_mm_comfalse_epu16(__m128i __A, __m128i __B) |
440 |
{ |
441 |
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
|
442 |
} |
443 |
|
444 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
445 |
_mm_comtrue_epu16(__m128i __A, __m128i __B) |
446 |
{ |
447 |
return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
|
448 |
} |
449 |
|
450 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
451 |
_mm_comlt_epu32(__m128i __A, __m128i __B) |
452 |
{ |
453 |
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
|
454 |
} |
455 |
|
456 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
457 |
_mm_comle_epu32(__m128i __A, __m128i __B) |
458 |
{ |
459 |
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
|
460 |
} |
461 |
|
462 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
463 |
_mm_comgt_epu32(__m128i __A, __m128i __B) |
464 |
{ |
465 |
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
|
466 |
} |
467 |
|
468 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
469 |
_mm_comge_epu32(__m128i __A, __m128i __B) |
470 |
{ |
471 |
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
|
472 |
} |
473 |
|
474 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
475 |
_mm_comeq_epu32(__m128i __A, __m128i __B) |
476 |
{ |
477 |
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
|
478 |
} |
479 |
|
480 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
481 |
_mm_comneq_epu32(__m128i __A, __m128i __B) |
482 |
{ |
483 |
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
|
484 |
} |
485 |
|
486 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
487 |
_mm_comfalse_epu32(__m128i __A, __m128i __B) |
488 |
{ |
489 |
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
|
490 |
} |
491 |
|
492 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
493 |
_mm_comtrue_epu32(__m128i __A, __m128i __B) |
494 |
{ |
495 |
return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
|
496 |
} |
497 |
|
498 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
499 |
_mm_comlt_epu64(__m128i __A, __m128i __B) |
500 |
{ |
501 |
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
|
502 |
} |
503 |
|
504 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
505 |
_mm_comle_epu64(__m128i __A, __m128i __B) |
506 |
{ |
507 |
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
|
508 |
} |
509 |
|
510 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
511 |
_mm_comgt_epu64(__m128i __A, __m128i __B) |
512 |
{ |
513 |
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
|
514 |
} |
515 |
|
516 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
517 |
_mm_comge_epu64(__m128i __A, __m128i __B) |
518 |
{ |
519 |
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
|
520 |
} |
521 |
|
522 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
523 |
_mm_comeq_epu64(__m128i __A, __m128i __B) |
524 |
{ |
525 |
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
|
526 |
} |
527 |
|
528 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
529 |
_mm_comneq_epu64(__m128i __A, __m128i __B) |
530 |
{ |
531 |
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
|
532 |
} |
533 |
|
534 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
535 |
_mm_comfalse_epu64(__m128i __A, __m128i __B) |
536 |
{ |
537 |
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
|
538 |
} |
539 |
|
540 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
541 |
_mm_comtrue_epu64(__m128i __A, __m128i __B) |
542 |
{ |
543 |
return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
|
544 |
} |
545 |
|
546 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
547 |
_mm_comlt_epi8(__m128i __A, __m128i __B) |
548 |
{ |
549 |
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
|
550 |
} |
551 |
|
552 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
553 |
_mm_comle_epi8(__m128i __A, __m128i __B) |
554 |
{ |
555 |
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
|
556 |
} |
557 |
|
558 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
559 |
_mm_comgt_epi8(__m128i __A, __m128i __B) |
560 |
{ |
561 |
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
|
562 |
} |
563 |
|
564 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
565 |
_mm_comge_epi8(__m128i __A, __m128i __B) |
566 |
{ |
567 |
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
|
568 |
} |
569 |
|
570 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
571 |
_mm_comeq_epi8(__m128i __A, __m128i __B) |
572 |
{ |
573 |
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
|
574 |
} |
575 |
|
576 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
577 |
_mm_comneq_epi8(__m128i __A, __m128i __B) |
578 |
{ |
579 |
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
|
580 |
} |
581 |
|
582 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
583 |
_mm_comfalse_epi8(__m128i __A, __m128i __B) |
584 |
{ |
585 |
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
|
586 |
} |
587 |
|
588 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
589 |
_mm_comtrue_epi8(__m128i __A, __m128i __B) |
590 |
{ |
591 |
return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
|
592 |
} |
593 |
|
594 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
595 |
_mm_comlt_epi16(__m128i __A, __m128i __B) |
596 |
{ |
597 |
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
|
598 |
} |
599 |
|
600 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
601 |
_mm_comle_epi16(__m128i __A, __m128i __B) |
602 |
{ |
603 |
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
|
604 |
} |
605 |
|
606 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
607 |
_mm_comgt_epi16(__m128i __A, __m128i __B) |
608 |
{ |
609 |
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
|
610 |
} |
611 |
|
612 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
613 |
_mm_comge_epi16(__m128i __A, __m128i __B) |
614 |
{ |
615 |
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
|
616 |
} |
617 |
|
618 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
619 |
_mm_comeq_epi16(__m128i __A, __m128i __B) |
620 |
{ |
621 |
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
|
622 |
} |
623 |
|
624 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
625 |
_mm_comneq_epi16(__m128i __A, __m128i __B) |
626 |
{ |
627 |
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
|
628 |
} |
629 |
|
630 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
631 |
_mm_comfalse_epi16(__m128i __A, __m128i __B) |
632 |
{ |
633 |
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
|
634 |
} |
635 |
|
636 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
637 |
_mm_comtrue_epi16(__m128i __A, __m128i __B) |
638 |
{ |
639 |
return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
|
640 |
} |
641 |
|
642 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
643 |
_mm_comlt_epi32(__m128i __A, __m128i __B) |
644 |
{ |
645 |
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
|
646 |
} |
647 |
|
648 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
649 |
_mm_comle_epi32(__m128i __A, __m128i __B) |
650 |
{ |
651 |
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
|
652 |
} |
653 |
|
654 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
655 |
_mm_comgt_epi32(__m128i __A, __m128i __B) |
656 |
{ |
657 |
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
|
658 |
} |
659 |
|
660 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
661 |
_mm_comge_epi32(__m128i __A, __m128i __B) |
662 |
{ |
663 |
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
|
664 |
} |
665 |
|
666 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
667 |
_mm_comeq_epi32(__m128i __A, __m128i __B) |
668 |
{ |
669 |
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
|
670 |
} |
671 |
|
672 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
673 |
_mm_comneq_epi32(__m128i __A, __m128i __B) |
674 |
{ |
675 |
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
|
676 |
} |
677 |
|
678 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
679 |
_mm_comfalse_epi32(__m128i __A, __m128i __B) |
680 |
{ |
681 |
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
|
682 |
} |
683 |
|
684 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
685 |
_mm_comtrue_epi32(__m128i __A, __m128i __B) |
686 |
{ |
687 |
return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
|
688 |
} |
689 |
|
690 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
691 |
_mm_comlt_epi64(__m128i __A, __m128i __B) |
692 |
{ |
693 |
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
|
694 |
} |
695 |
|
696 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
697 |
_mm_comle_epi64(__m128i __A, __m128i __B) |
698 |
{ |
699 |
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
|
700 |
} |
701 |
|
702 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
703 |
_mm_comgt_epi64(__m128i __A, __m128i __B) |
704 |
{ |
705 |
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
|
706 |
} |
707 |
|
708 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
709 |
_mm_comge_epi64(__m128i __A, __m128i __B) |
710 |
{ |
711 |
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
|
712 |
} |
713 |
|
714 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
715 |
_mm_comeq_epi64(__m128i __A, __m128i __B) |
716 |
{ |
717 |
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
|
718 |
} |
719 |
|
720 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
721 |
_mm_comneq_epi64(__m128i __A, __m128i __B) |
722 |
{ |
723 |
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
|
724 |
} |
725 |
|
726 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
727 |
_mm_comfalse_epi64(__m128i __A, __m128i __B) |
728 |
{ |
729 |
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
|
730 |
} |
731 |
|
732 |
static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
|
733 |
_mm_comtrue_epi64(__m128i __A, __m128i __B) |
734 |
{ |
735 |
return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
|
736 |
} |
737 |
|
738 |
#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
|
739 |
__m128d __X = (X); \ |
740 |
__m128d __Y = (Y); \ |
741 |
__m128i __C = (C); \ |
742 |
(__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \ |
743 |
(__v2di)__C, (I)); }) |
744 |
|
745 |
#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
|
746 |
__m256d __X = (X); \ |
747 |
__m256d __Y = (Y); \ |
748 |
__m256i __C = (C); \ |
749 |
(__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \ |
750 |
(__v4di)__C, (I)); }) |
751 |
|
752 |
#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
|
753 |
__m128 __X = (X); \ |
754 |
__m128 __Y = (Y); \ |
755 |
__m128i __C = (C); \ |
756 |
(__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \ |
757 |
(__v4si)__C, (I)); }) |
758 |
|
759 |
#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
|
760 |
__m256 __X = (X); \ |
761 |
__m256 __Y = (Y); \ |
762 |
__m256i __C = (C); \ |
763 |
(__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \ |
764 |
(__v8si)__C, (I)); }) |
765 |
|
766 |
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
767 |
_mm_frcz_ss(__m128 __A) |
768 |
{ |
769 |
return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
|
770 |
} |
771 |
|
772 |
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
773 |
_mm_frcz_sd(__m128d __A) |
774 |
{ |
775 |
return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
|
776 |
} |
777 |
|
778 |
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
779 |
_mm_frcz_ps(__m128 __A) |
780 |
{ |
781 |
return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
|
782 |
} |
783 |
|
784 |
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
785 |
_mm_frcz_pd(__m128d __A) |
786 |
{ |
787 |
return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
|
788 |
} |
789 |
|
790 |
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
|
791 |
_mm256_frcz_ps(__m256 __A) |
792 |
{ |
793 |
return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
|
794 |
} |
795 |
|
796 |
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
|
797 |
_mm256_frcz_pd(__m256d __A) |
798 |
{ |
799 |
return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
|
800 |
} |
801 |
|
802 |
#endif /* __XOP__ */ |
803 |
|
804 |
#endif /* __XOPINTRIN_H */ |