root / lab4 / .minix-src / include / clang-3.6 / xmmintrin.h @ 13
History | View | Annotate | Download (26.9 KB)
1 | 13 | up20180614 | /*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
|
---|---|---|---|
2 | *
|
||
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
4 | * of this software and associated documentation files (the "Software"), to deal
|
||
5 | * in the Software without restriction, including without limitation the rights
|
||
6 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
7 | * copies of the Software, and to permit persons to whom the Software is
|
||
8 | * furnished to do so, subject to the following conditions:
|
||
9 | *
|
||
10 | * The above copyright notice and this permission notice shall be included in
|
||
11 | * all copies or substantial portions of the Software.
|
||
12 | *
|
||
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
18 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||
19 | * THE SOFTWARE.
|
||
20 | *
|
||
21 | *===-----------------------------------------------------------------------===
|
||
22 | */
|
||
23 | |||
24 | #ifndef __XMMINTRIN_H
|
||
25 | #define __XMMINTRIN_H
|
||
26 | |||
27 | #ifndef __SSE__
|
||
28 | #error "SSE instruction set not enabled" |
||
29 | #else
|
||
30 | |||
31 | #include <mmintrin.h> |
||
32 | |||
33 | typedef int __v4si __attribute__((__vector_size__(16))); |
||
34 | typedef float __v4sf __attribute__((__vector_size__(16))); |
||
35 | typedef float __m128 __attribute__((__vector_size__(16))); |
||
36 | |||
37 | /* This header should only be included in a hosted environment as it depends on
|
||
38 | * a standard library to provide allocation routines. */
|
||
39 | #if __STDC_HOSTED__
|
||
40 | #include <mm_malloc.h> |
||
41 | #endif
|
||
42 | |||
43 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
44 | _mm_add_ss(__m128 __a, __m128 __b) |
||
45 | { |
||
46 | __a[0] += __b[0]; |
||
47 | return __a;
|
||
48 | } |
||
49 | |||
50 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
51 | _mm_add_ps(__m128 __a, __m128 __b) |
||
52 | { |
||
53 | return __a + __b;
|
||
54 | } |
||
55 | |||
56 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
57 | _mm_sub_ss(__m128 __a, __m128 __b) |
||
58 | { |
||
59 | __a[0] -= __b[0]; |
||
60 | return __a;
|
||
61 | } |
||
62 | |||
63 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
64 | _mm_sub_ps(__m128 __a, __m128 __b) |
||
65 | { |
||
66 | return __a - __b;
|
||
67 | } |
||
68 | |||
69 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
70 | _mm_mul_ss(__m128 __a, __m128 __b) |
||
71 | { |
||
72 | __a[0] *= __b[0]; |
||
73 | return __a;
|
||
74 | } |
||
75 | |||
76 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
77 | _mm_mul_ps(__m128 __a, __m128 __b) |
||
78 | { |
||
79 | return __a * __b;
|
||
80 | } |
||
81 | |||
82 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
83 | _mm_div_ss(__m128 __a, __m128 __b) |
||
84 | { |
||
85 | __a[0] /= __b[0]; |
||
86 | return __a;
|
||
87 | } |
||
88 | |||
89 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
90 | _mm_div_ps(__m128 __a, __m128 __b) |
||
91 | { |
||
92 | return __a / __b;
|
||
93 | } |
||
94 | |||
95 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
96 | _mm_sqrt_ss(__m128 __a) |
||
97 | { |
||
98 | __m128 __c = __builtin_ia32_sqrtss(__a); |
||
99 | return (__m128) { __c[0], __a[1], __a[2], __a[3] }; |
||
100 | } |
||
101 | |||
102 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
103 | _mm_sqrt_ps(__m128 __a) |
||
104 | { |
||
105 | return __builtin_ia32_sqrtps(__a);
|
||
106 | } |
||
107 | |||
108 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
109 | _mm_rcp_ss(__m128 __a) |
||
110 | { |
||
111 | __m128 __c = __builtin_ia32_rcpss(__a); |
||
112 | return (__m128) { __c[0], __a[1], __a[2], __a[3] }; |
||
113 | } |
||
114 | |||
115 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
116 | _mm_rcp_ps(__m128 __a) |
||
117 | { |
||
118 | return __builtin_ia32_rcpps(__a);
|
||
119 | } |
||
120 | |||
121 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
122 | _mm_rsqrt_ss(__m128 __a) |
||
123 | { |
||
124 | __m128 __c = __builtin_ia32_rsqrtss(__a); |
||
125 | return (__m128) { __c[0], __a[1], __a[2], __a[3] }; |
||
126 | } |
||
127 | |||
128 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
129 | _mm_rsqrt_ps(__m128 __a) |
||
130 | { |
||
131 | return __builtin_ia32_rsqrtps(__a);
|
||
132 | } |
||
133 | |||
134 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
135 | _mm_min_ss(__m128 __a, __m128 __b) |
||
136 | { |
||
137 | return __builtin_ia32_minss(__a, __b);
|
||
138 | } |
||
139 | |||
140 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
141 | _mm_min_ps(__m128 __a, __m128 __b) |
||
142 | { |
||
143 | return __builtin_ia32_minps(__a, __b);
|
||
144 | } |
||
145 | |||
146 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
147 | _mm_max_ss(__m128 __a, __m128 __b) |
||
148 | { |
||
149 | return __builtin_ia32_maxss(__a, __b);
|
||
150 | } |
||
151 | |||
152 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
153 | _mm_max_ps(__m128 __a, __m128 __b) |
||
154 | { |
||
155 | return __builtin_ia32_maxps(__a, __b);
|
||
156 | } |
||
157 | |||
158 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
159 | _mm_and_ps(__m128 __a, __m128 __b) |
||
160 | { |
||
161 | return (__m128)((__v4si)__a & (__v4si)__b);
|
||
162 | } |
||
163 | |||
164 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
165 | _mm_andnot_ps(__m128 __a, __m128 __b) |
||
166 | { |
||
167 | return (__m128)(~(__v4si)__a & (__v4si)__b);
|
||
168 | } |
||
169 | |||
170 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
171 | _mm_or_ps(__m128 __a, __m128 __b) |
||
172 | { |
||
173 | return (__m128)((__v4si)__a | (__v4si)__b);
|
||
174 | } |
||
175 | |||
176 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
177 | _mm_xor_ps(__m128 __a, __m128 __b) |
||
178 | { |
||
179 | return (__m128)((__v4si)__a ^ (__v4si)__b);
|
||
180 | } |
||
181 | |||
182 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
183 | _mm_cmpeq_ss(__m128 __a, __m128 __b) |
||
184 | { |
||
185 | return (__m128)__builtin_ia32_cmpeqss(__a, __b);
|
||
186 | } |
||
187 | |||
188 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
189 | _mm_cmpeq_ps(__m128 __a, __m128 __b) |
||
190 | { |
||
191 | return (__m128)__builtin_ia32_cmpeqps(__a, __b);
|
||
192 | } |
||
193 | |||
194 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
195 | _mm_cmplt_ss(__m128 __a, __m128 __b) |
||
196 | { |
||
197 | return (__m128)__builtin_ia32_cmpltss(__a, __b);
|
||
198 | } |
||
199 | |||
200 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
201 | _mm_cmplt_ps(__m128 __a, __m128 __b) |
||
202 | { |
||
203 | return (__m128)__builtin_ia32_cmpltps(__a, __b);
|
||
204 | } |
||
205 | |||
206 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
207 | _mm_cmple_ss(__m128 __a, __m128 __b) |
||
208 | { |
||
209 | return (__m128)__builtin_ia32_cmpless(__a, __b);
|
||
210 | } |
||
211 | |||
212 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
213 | _mm_cmple_ps(__m128 __a, __m128 __b) |
||
214 | { |
||
215 | return (__m128)__builtin_ia32_cmpleps(__a, __b);
|
||
216 | } |
||
217 | |||
218 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
219 | _mm_cmpgt_ss(__m128 __a, __m128 __b) |
||
220 | { |
||
221 | return (__m128)__builtin_shufflevector(__a,
|
||
222 | __builtin_ia32_cmpltss(__b, __a), |
||
223 | 4, 1, 2, 3); |
||
224 | } |
||
225 | |||
226 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
227 | _mm_cmpgt_ps(__m128 __a, __m128 __b) |
||
228 | { |
||
229 | return (__m128)__builtin_ia32_cmpltps(__b, __a);
|
||
230 | } |
||
231 | |||
232 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
233 | _mm_cmpge_ss(__m128 __a, __m128 __b) |
||
234 | { |
||
235 | return (__m128)__builtin_shufflevector(__a,
|
||
236 | __builtin_ia32_cmpless(__b, __a), |
||
237 | 4, 1, 2, 3); |
||
238 | } |
||
239 | |||
240 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
241 | _mm_cmpge_ps(__m128 __a, __m128 __b) |
||
242 | { |
||
243 | return (__m128)__builtin_ia32_cmpleps(__b, __a);
|
||
244 | } |
||
245 | |||
246 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
247 | _mm_cmpneq_ss(__m128 __a, __m128 __b) |
||
248 | { |
||
249 | return (__m128)__builtin_ia32_cmpneqss(__a, __b);
|
||
250 | } |
||
251 | |||
252 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
253 | _mm_cmpneq_ps(__m128 __a, __m128 __b) |
||
254 | { |
||
255 | return (__m128)__builtin_ia32_cmpneqps(__a, __b);
|
||
256 | } |
||
257 | |||
258 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
259 | _mm_cmpnlt_ss(__m128 __a, __m128 __b) |
||
260 | { |
||
261 | return (__m128)__builtin_ia32_cmpnltss(__a, __b);
|
||
262 | } |
||
263 | |||
264 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
265 | _mm_cmpnlt_ps(__m128 __a, __m128 __b) |
||
266 | { |
||
267 | return (__m128)__builtin_ia32_cmpnltps(__a, __b);
|
||
268 | } |
||
269 | |||
270 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
271 | _mm_cmpnle_ss(__m128 __a, __m128 __b) |
||
272 | { |
||
273 | return (__m128)__builtin_ia32_cmpnless(__a, __b);
|
||
274 | } |
||
275 | |||
276 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
277 | _mm_cmpnle_ps(__m128 __a, __m128 __b) |
||
278 | { |
||
279 | return (__m128)__builtin_ia32_cmpnleps(__a, __b);
|
||
280 | } |
||
281 | |||
282 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
283 | _mm_cmpngt_ss(__m128 __a, __m128 __b) |
||
284 | { |
||
285 | return (__m128)__builtin_shufflevector(__a,
|
||
286 | __builtin_ia32_cmpnltss(__b, __a), |
||
287 | 4, 1, 2, 3); |
||
288 | } |
||
289 | |||
290 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
291 | _mm_cmpngt_ps(__m128 __a, __m128 __b) |
||
292 | { |
||
293 | return (__m128)__builtin_ia32_cmpnltps(__b, __a);
|
||
294 | } |
||
295 | |||
296 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
297 | _mm_cmpnge_ss(__m128 __a, __m128 __b) |
||
298 | { |
||
299 | return (__m128)__builtin_shufflevector(__a,
|
||
300 | __builtin_ia32_cmpnless(__b, __a), |
||
301 | 4, 1, 2, 3); |
||
302 | } |
||
303 | |||
304 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
305 | _mm_cmpnge_ps(__m128 __a, __m128 __b) |
||
306 | { |
||
307 | return (__m128)__builtin_ia32_cmpnleps(__b, __a);
|
||
308 | } |
||
309 | |||
310 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
311 | _mm_cmpord_ss(__m128 __a, __m128 __b) |
||
312 | { |
||
313 | return (__m128)__builtin_ia32_cmpordss(__a, __b);
|
||
314 | } |
||
315 | |||
316 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
317 | _mm_cmpord_ps(__m128 __a, __m128 __b) |
||
318 | { |
||
319 | return (__m128)__builtin_ia32_cmpordps(__a, __b);
|
||
320 | } |
||
321 | |||
322 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
323 | _mm_cmpunord_ss(__m128 __a, __m128 __b) |
||
324 | { |
||
325 | return (__m128)__builtin_ia32_cmpunordss(__a, __b);
|
||
326 | } |
||
327 | |||
328 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
329 | _mm_cmpunord_ps(__m128 __a, __m128 __b) |
||
330 | { |
||
331 | return (__m128)__builtin_ia32_cmpunordps(__a, __b);
|
||
332 | } |
||
333 | |||
334 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
335 | _mm_comieq_ss(__m128 __a, __m128 __b) |
||
336 | { |
||
337 | return __builtin_ia32_comieq(__a, __b);
|
||
338 | } |
||
339 | |||
340 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
341 | _mm_comilt_ss(__m128 __a, __m128 __b) |
||
342 | { |
||
343 | return __builtin_ia32_comilt(__a, __b);
|
||
344 | } |
||
345 | |||
346 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
347 | _mm_comile_ss(__m128 __a, __m128 __b) |
||
348 | { |
||
349 | return __builtin_ia32_comile(__a, __b);
|
||
350 | } |
||
351 | |||
352 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
353 | _mm_comigt_ss(__m128 __a, __m128 __b) |
||
354 | { |
||
355 | return __builtin_ia32_comigt(__a, __b);
|
||
356 | } |
||
357 | |||
358 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
359 | _mm_comige_ss(__m128 __a, __m128 __b) |
||
360 | { |
||
361 | return __builtin_ia32_comige(__a, __b);
|
||
362 | } |
||
363 | |||
364 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
365 | _mm_comineq_ss(__m128 __a, __m128 __b) |
||
366 | { |
||
367 | return __builtin_ia32_comineq(__a, __b);
|
||
368 | } |
||
369 | |||
370 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
371 | _mm_ucomieq_ss(__m128 __a, __m128 __b) |
||
372 | { |
||
373 | return __builtin_ia32_ucomieq(__a, __b);
|
||
374 | } |
||
375 | |||
376 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
377 | _mm_ucomilt_ss(__m128 __a, __m128 __b) |
||
378 | { |
||
379 | return __builtin_ia32_ucomilt(__a, __b);
|
||
380 | } |
||
381 | |||
382 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
383 | _mm_ucomile_ss(__m128 __a, __m128 __b) |
||
384 | { |
||
385 | return __builtin_ia32_ucomile(__a, __b);
|
||
386 | } |
||
387 | |||
388 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
389 | _mm_ucomigt_ss(__m128 __a, __m128 __b) |
||
390 | { |
||
391 | return __builtin_ia32_ucomigt(__a, __b);
|
||
392 | } |
||
393 | |||
394 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
395 | _mm_ucomige_ss(__m128 __a, __m128 __b) |
||
396 | { |
||
397 | return __builtin_ia32_ucomige(__a, __b);
|
||
398 | } |
||
399 | |||
400 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
401 | _mm_ucomineq_ss(__m128 __a, __m128 __b) |
||
402 | { |
||
403 | return __builtin_ia32_ucomineq(__a, __b);
|
||
404 | } |
||
405 | |||
406 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
407 | _mm_cvtss_si32(__m128 __a) |
||
408 | { |
||
409 | return __builtin_ia32_cvtss2si(__a);
|
||
410 | } |
||
411 | |||
412 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
413 | _mm_cvt_ss2si(__m128 __a) |
||
414 | { |
||
415 | return _mm_cvtss_si32(__a);
|
||
416 | } |
||
417 | |||
418 | #ifdef __x86_64__
|
||
419 | |||
420 | static __inline__ long long __attribute__((__always_inline__, __nodebug__)) |
||
421 | _mm_cvtss_si64(__m128 __a) |
||
422 | { |
||
423 | return __builtin_ia32_cvtss2si64(__a);
|
||
424 | } |
||
425 | |||
426 | #endif
|
||
427 | |||
428 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
429 | _mm_cvtps_pi32(__m128 __a) |
||
430 | { |
||
431 | return (__m64)__builtin_ia32_cvtps2pi(__a);
|
||
432 | } |
||
433 | |||
434 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
435 | _mm_cvt_ps2pi(__m128 __a) |
||
436 | { |
||
437 | return _mm_cvtps_pi32(__a);
|
||
438 | } |
||
439 | |||
440 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
441 | _mm_cvttss_si32(__m128 __a) |
||
442 | { |
||
443 | return __a[0]; |
||
444 | } |
||
445 | |||
446 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
447 | _mm_cvtt_ss2si(__m128 __a) |
||
448 | { |
||
449 | return _mm_cvttss_si32(__a);
|
||
450 | } |
||
451 | |||
452 | static __inline__ long long __attribute__((__always_inline__, __nodebug__)) |
||
453 | _mm_cvttss_si64(__m128 __a) |
||
454 | { |
||
455 | return __a[0]; |
||
456 | } |
||
457 | |||
458 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
459 | _mm_cvttps_pi32(__m128 __a) |
||
460 | { |
||
461 | return (__m64)__builtin_ia32_cvttps2pi(__a);
|
||
462 | } |
||
463 | |||
464 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
465 | _mm_cvtt_ps2pi(__m128 __a) |
||
466 | { |
||
467 | return _mm_cvttps_pi32(__a);
|
||
468 | } |
||
469 | |||
470 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
471 | _mm_cvtsi32_ss(__m128 __a, int __b)
|
||
472 | { |
||
473 | __a[0] = __b;
|
||
474 | return __a;
|
||
475 | } |
||
476 | |||
477 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
478 | _mm_cvt_si2ss(__m128 __a, int __b)
|
||
479 | { |
||
480 | return _mm_cvtsi32_ss(__a, __b);
|
||
481 | } |
||
482 | |||
483 | #ifdef __x86_64__
|
||
484 | |||
485 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
486 | _mm_cvtsi64_ss(__m128 __a, long long __b) |
||
487 | { |
||
488 | __a[0] = __b;
|
||
489 | return __a;
|
||
490 | } |
||
491 | |||
492 | #endif
|
||
493 | |||
494 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
495 | _mm_cvtpi32_ps(__m128 __a, __m64 __b) |
||
496 | { |
||
497 | return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b);
|
||
498 | } |
||
499 | |||
500 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
501 | _mm_cvt_pi2ps(__m128 __a, __m64 __b) |
||
502 | { |
||
503 | return _mm_cvtpi32_ps(__a, __b);
|
||
504 | } |
||
505 | |||
506 | static __inline__ float __attribute__((__always_inline__, __nodebug__)) |
||
507 | _mm_cvtss_f32(__m128 __a) |
||
508 | { |
||
509 | return __a[0]; |
||
510 | } |
||
511 | |||
512 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
513 | _mm_loadh_pi(__m128 __a, const __m64 *__p)
|
||
514 | { |
||
515 | typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8))); |
||
516 | struct __mm_loadh_pi_struct {
|
||
517 | __mm_loadh_pi_v2f32 __u; |
||
518 | } __attribute__((__packed__, __may_alias__)); |
||
519 | __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u;
|
||
520 | __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); |
||
521 | return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); |
||
522 | } |
||
523 | |||
524 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
525 | _mm_loadl_pi(__m128 __a, const __m64 *__p)
|
||
526 | { |
||
527 | typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8))); |
||
528 | struct __mm_loadl_pi_struct {
|
||
529 | __mm_loadl_pi_v2f32 __u; |
||
530 | } __attribute__((__packed__, __may_alias__)); |
||
531 | __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u;
|
||
532 | __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); |
||
533 | return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); |
||
534 | } |
||
535 | |||
536 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
537 | _mm_load_ss(const float *__p) |
||
538 | { |
||
539 | struct __mm_load_ss_struct {
|
||
540 | float __u;
|
||
541 | } __attribute__((__packed__, __may_alias__)); |
||
542 | float __u = ((struct __mm_load_ss_struct*)__p)->__u; |
||
543 | return (__m128){ __u, 0, 0, 0 }; |
||
544 | } |
||
545 | |||
546 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
547 | _mm_load1_ps(const float *__p) |
||
548 | { |
||
549 | struct __mm_load1_ps_struct {
|
||
550 | float __u;
|
||
551 | } __attribute__((__packed__, __may_alias__)); |
||
552 | float __u = ((struct __mm_load1_ps_struct*)__p)->__u; |
||
553 | return (__m128){ __u, __u, __u, __u };
|
||
554 | } |
||
555 | |||
556 | #define _mm_load_ps1(p) _mm_load1_ps(p)
|
||
557 | |||
558 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
559 | _mm_load_ps(const float *__p) |
||
560 | { |
||
561 | return *(__m128*)__p;
|
||
562 | } |
||
563 | |||
564 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
565 | _mm_loadu_ps(const float *__p) |
||
566 | { |
||
567 | struct __loadu_ps {
|
||
568 | __m128 __v; |
||
569 | } __attribute__((__packed__, __may_alias__)); |
||
570 | return ((struct __loadu_ps*)__p)->__v; |
||
571 | } |
||
572 | |||
573 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
574 | _mm_loadr_ps(const float *__p) |
||
575 | { |
||
576 | __m128 __a = _mm_load_ps(__p); |
||
577 | return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); |
||
578 | } |
||
579 | |||
580 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
581 | _mm_set_ss(float __w)
|
||
582 | { |
||
583 | return (__m128){ __w, 0, 0, 0 }; |
||
584 | } |
||
585 | |||
586 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
587 | _mm_set1_ps(float __w)
|
||
588 | { |
||
589 | return (__m128){ __w, __w, __w, __w };
|
||
590 | } |
||
591 | |||
592 | /* Microsoft specific. */
|
||
593 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
594 | _mm_set_ps1(float __w)
|
||
595 | { |
||
596 | return _mm_set1_ps(__w);
|
||
597 | } |
||
598 | |||
599 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
600 | _mm_set_ps(float __z, float __y, float __x, float __w) |
||
601 | { |
||
602 | return (__m128){ __w, __x, __y, __z };
|
||
603 | } |
||
604 | |||
605 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
606 | _mm_setr_ps(float __z, float __y, float __x, float __w) |
||
607 | { |
||
608 | return (__m128){ __z, __y, __x, __w };
|
||
609 | } |
||
610 | |||
611 | static __inline__ __m128 __attribute__((__always_inline__))
|
||
612 | _mm_setzero_ps(void)
|
||
613 | { |
||
614 | return (__m128){ 0, 0, 0, 0 }; |
||
615 | } |
||
616 | |||
617 | static __inline__ void __attribute__((__always_inline__)) |
||
618 | _mm_storeh_pi(__m64 *__p, __m128 __a) |
||
619 | { |
||
620 | __builtin_ia32_storehps((__v2si *)__p, __a); |
||
621 | } |
||
622 | |||
623 | static __inline__ void __attribute__((__always_inline__)) |
||
624 | _mm_storel_pi(__m64 *__p, __m128 __a) |
||
625 | { |
||
626 | __builtin_ia32_storelps((__v2si *)__p, __a); |
||
627 | } |
||
628 | |||
629 | static __inline__ void __attribute__((__always_inline__)) |
||
630 | _mm_store_ss(float *__p, __m128 __a)
|
||
631 | { |
||
632 | struct __mm_store_ss_struct {
|
||
633 | float __u;
|
||
634 | } __attribute__((__packed__, __may_alias__)); |
||
635 | ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; |
||
636 | } |
||
637 | |||
638 | static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
||
639 | _mm_storeu_ps(float *__p, __m128 __a)
|
||
640 | { |
||
641 | __builtin_ia32_storeups(__p, __a); |
||
642 | } |
||
643 | |||
644 | static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
||
645 | _mm_store1_ps(float *__p, __m128 __a)
|
||
646 | { |
||
647 | __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0); |
||
648 | _mm_storeu_ps(__p, __a); |
||
649 | } |
||
650 | |||
651 | static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
||
652 | _mm_store_ps1(float *__p, __m128 __a)
|
||
653 | { |
||
654 | return _mm_store1_ps(__p, __a);
|
||
655 | } |
||
656 | |||
657 | static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
||
658 | _mm_store_ps(float *__p, __m128 __a)
|
||
659 | { |
||
660 | *(__m128 *)__p = __a; |
||
661 | } |
||
662 | |||
663 | static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
||
664 | _mm_storer_ps(float *__p, __m128 __a)
|
||
665 | { |
||
666 | __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0); |
||
667 | _mm_store_ps(__p, __a); |
||
668 | } |
||
669 | |||
670 | #define _MM_HINT_T0 3 |
||
671 | #define _MM_HINT_T1 2 |
||
672 | #define _MM_HINT_T2 1 |
||
673 | #define _MM_HINT_NTA 0 |
||
674 | |||
675 | #ifndef _MSC_VER
|
||
676 | /* FIXME: We have to #define this because "sel" must be a constant integer, and
|
||
677 | Sema doesn't do any form of constant propagation yet. */
|
||
678 | |||
679 | #define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel))) |
||
680 | #endif
|
||
681 | |||
682 | static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
||
683 | _mm_stream_pi(__m64 *__p, __m64 __a) |
||
684 | { |
||
685 | __builtin_ia32_movntq(__p, __a); |
||
686 | } |
||
687 | |||
688 | static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
||
689 | _mm_stream_ps(float *__p, __m128 __a)
|
||
690 | { |
||
691 | __builtin_ia32_movntps(__p, __a); |
||
692 | } |
||
693 | |||
694 | static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
||
695 | _mm_sfence(void)
|
||
696 | { |
||
697 | __builtin_ia32_sfence(); |
||
698 | } |
||
699 | |||
700 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
701 | _mm_extract_pi16(__m64 __a, int __n)
|
||
702 | { |
||
703 | __v4hi __b = (__v4hi)__a; |
||
704 | return (unsigned short)__b[__n & 3]; |
||
705 | } |
||
706 | |||
707 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
708 | _mm_insert_pi16(__m64 __a, int __d, int __n) |
||
709 | { |
||
710 | __v4hi __b = (__v4hi)__a; |
||
711 | __b[__n & 3] = __d;
|
||
712 | return (__m64)__b;
|
||
713 | } |
||
714 | |||
715 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
716 | _mm_max_pi16(__m64 __a, __m64 __b) |
||
717 | { |
||
718 | return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
|
||
719 | } |
||
720 | |||
721 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
722 | _mm_max_pu8(__m64 __a, __m64 __b) |
||
723 | { |
||
724 | return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
|
||
725 | } |
||
726 | |||
727 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
728 | _mm_min_pi16(__m64 __a, __m64 __b) |
||
729 | { |
||
730 | return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
|
||
731 | } |
||
732 | |||
733 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
734 | _mm_min_pu8(__m64 __a, __m64 __b) |
||
735 | { |
||
736 | return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
|
||
737 | } |
||
738 | |||
739 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
740 | _mm_movemask_pi8(__m64 __a) |
||
741 | { |
||
742 | return __builtin_ia32_pmovmskb((__v8qi)__a);
|
||
743 | } |
||
744 | |||
745 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
746 | _mm_mulhi_pu16(__m64 __a, __m64 __b) |
||
747 | { |
||
748 | return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
|
||
749 | } |
||
750 | |||
751 | #define _mm_shuffle_pi16(a, n) __extension__ ({ \
|
||
752 | __m64 __a = (a); \ |
||
753 | (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); }) |
||
754 | |||
755 | static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
||
756 | _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
|
||
757 | { |
||
758 | __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p); |
||
759 | } |
||
760 | |||
761 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
762 | _mm_avg_pu8(__m64 __a, __m64 __b) |
||
763 | { |
||
764 | return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
|
||
765 | } |
||
766 | |||
767 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
768 | _mm_avg_pu16(__m64 __a, __m64 __b) |
||
769 | { |
||
770 | return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
|
||
771 | } |
||
772 | |||
773 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
774 | _mm_sad_pu8(__m64 __a, __m64 __b) |
||
775 | { |
||
776 | return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
|
||
777 | } |
||
778 | |||
779 | static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) |
||
780 | _mm_getcsr(void)
|
||
781 | { |
||
782 | return __builtin_ia32_stmxcsr();
|
||
783 | } |
||
784 | |||
785 | static __inline__ void __attribute__((__always_inline__, __nodebug__)) |
||
786 | _mm_setcsr(unsigned int __i) |
||
787 | { |
||
788 | __builtin_ia32_ldmxcsr(__i); |
||
789 | } |
||
790 | |||
791 | #define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
|
||
792 | __m128 __a = (a); \ |
||
793 | __m128 __b = (b); \ |
||
794 | (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \ |
||
795 | (mask) & 0x3, ((mask) & 0xc) >> 2, \ |
||
796 | (((mask) & 0x30) >> 4) + 4, \ |
||
797 | (((mask) & 0xc0) >> 6) + 4); }) |
||
798 | |||
799 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
800 | _mm_unpackhi_ps(__m128 __a, __m128 __b) |
||
801 | { |
||
802 | return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); |
||
803 | } |
||
804 | |||
805 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
806 | _mm_unpacklo_ps(__m128 __a, __m128 __b) |
||
807 | { |
||
808 | return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); |
||
809 | } |
||
810 | |||
811 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
812 | _mm_move_ss(__m128 __a, __m128 __b) |
||
813 | { |
||
814 | return __builtin_shufflevector(__a, __b, 4, 1, 2, 3); |
||
815 | } |
||
816 | |||
817 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
818 | _mm_movehl_ps(__m128 __a, __m128 __b) |
||
819 | { |
||
820 | return __builtin_shufflevector(__a, __b, 6, 7, 2, 3); |
||
821 | } |
||
822 | |||
823 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
824 | _mm_movelh_ps(__m128 __a, __m128 __b) |
||
825 | { |
||
826 | return __builtin_shufflevector(__a, __b, 0, 1, 4, 5); |
||
827 | } |
||
828 | |||
829 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
830 | _mm_cvtpi16_ps(__m64 __a) |
||
831 | { |
||
832 | __m64 __b, __c; |
||
833 | __m128 __r; |
||
834 | |||
835 | __b = _mm_setzero_si64(); |
||
836 | __b = _mm_cmpgt_pi16(__b, __a); |
||
837 | __c = _mm_unpackhi_pi16(__a, __b); |
||
838 | __r = _mm_setzero_ps(); |
||
839 | __r = _mm_cvtpi32_ps(__r, __c); |
||
840 | __r = _mm_movelh_ps(__r, __r); |
||
841 | __c = _mm_unpacklo_pi16(__a, __b); |
||
842 | __r = _mm_cvtpi32_ps(__r, __c); |
||
843 | |||
844 | return __r;
|
||
845 | } |
||
846 | |||
847 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
848 | _mm_cvtpu16_ps(__m64 __a) |
||
849 | { |
||
850 | __m64 __b, __c; |
||
851 | __m128 __r; |
||
852 | |||
853 | __b = _mm_setzero_si64(); |
||
854 | __c = _mm_unpackhi_pi16(__a, __b); |
||
855 | __r = _mm_setzero_ps(); |
||
856 | __r = _mm_cvtpi32_ps(__r, __c); |
||
857 | __r = _mm_movelh_ps(__r, __r); |
||
858 | __c = _mm_unpacklo_pi16(__a, __b); |
||
859 | __r = _mm_cvtpi32_ps(__r, __c); |
||
860 | |||
861 | return __r;
|
||
862 | } |
||
863 | |||
864 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
865 | _mm_cvtpi8_ps(__m64 __a) |
||
866 | { |
||
867 | __m64 __b; |
||
868 | |||
869 | __b = _mm_setzero_si64(); |
||
870 | __b = _mm_cmpgt_pi8(__b, __a); |
||
871 | __b = _mm_unpacklo_pi8(__a, __b); |
||
872 | |||
873 | return _mm_cvtpi16_ps(__b);
|
||
874 | } |
||
875 | |||
876 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
877 | _mm_cvtpu8_ps(__m64 __a) |
||
878 | { |
||
879 | __m64 __b; |
||
880 | |||
881 | __b = _mm_setzero_si64(); |
||
882 | __b = _mm_unpacklo_pi8(__a, __b); |
||
883 | |||
884 | return _mm_cvtpi16_ps(__b);
|
||
885 | } |
||
886 | |||
887 | static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||
888 | _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) |
||
889 | { |
||
890 | __m128 __c; |
||
891 | |||
892 | __c = _mm_setzero_ps(); |
||
893 | __c = _mm_cvtpi32_ps(__c, __b); |
||
894 | __c = _mm_movelh_ps(__c, __c); |
||
895 | |||
896 | return _mm_cvtpi32_ps(__c, __a);
|
||
897 | } |
||
898 | |||
899 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
900 | _mm_cvtps_pi16(__m128 __a) |
||
901 | { |
||
902 | __m64 __b, __c; |
||
903 | |||
904 | __b = _mm_cvtps_pi32(__a); |
||
905 | __a = _mm_movehl_ps(__a, __a); |
||
906 | __c = _mm_cvtps_pi32(__a); |
||
907 | |||
908 | return _mm_packs_pi32(__b, __c);
|
||
909 | } |
||
910 | |||
911 | static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
|
||
912 | _mm_cvtps_pi8(__m128 __a) |
||
913 | { |
||
914 | __m64 __b, __c; |
||
915 | |||
916 | __b = _mm_cvtps_pi16(__a); |
||
917 | __c = _mm_setzero_si64(); |
||
918 | |||
919 | return _mm_packs_pi16(__b, __c);
|
||
920 | } |
||
921 | |||
922 | static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
||
923 | _mm_movemask_ps(__m128 __a) |
||
924 | { |
||
925 | return __builtin_ia32_movmskps(__a);
|
||
926 | } |
||
927 | |||
928 | #define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) |
||
929 | |||
930 | #define _MM_EXCEPT_INVALID (0x0001) |
||
931 | #define _MM_EXCEPT_DENORM (0x0002) |
||
932 | #define _MM_EXCEPT_DIV_ZERO (0x0004) |
||
933 | #define _MM_EXCEPT_OVERFLOW (0x0008) |
||
934 | #define _MM_EXCEPT_UNDERFLOW (0x0010) |
||
935 | #define _MM_EXCEPT_INEXACT (0x0020) |
||
936 | #define _MM_EXCEPT_MASK (0x003f) |
||
937 | |||
938 | #define _MM_MASK_INVALID (0x0080) |
||
939 | #define _MM_MASK_DENORM (0x0100) |
||
940 | #define _MM_MASK_DIV_ZERO (0x0200) |
||
941 | #define _MM_MASK_OVERFLOW (0x0400) |
||
942 | #define _MM_MASK_UNDERFLOW (0x0800) |
||
943 | #define _MM_MASK_INEXACT (0x1000) |
||
944 | #define _MM_MASK_MASK (0x1f80) |
||
945 | |||
946 | #define _MM_ROUND_NEAREST (0x0000) |
||
947 | #define _MM_ROUND_DOWN (0x2000) |
||
948 | #define _MM_ROUND_UP (0x4000) |
||
949 | #define _MM_ROUND_TOWARD_ZERO (0x6000) |
||
950 | #define _MM_ROUND_MASK (0x6000) |
||
951 | |||
952 | #define _MM_FLUSH_ZERO_MASK (0x8000) |
||
953 | #define _MM_FLUSH_ZERO_ON (0x8000) |
||
954 | #define _MM_FLUSH_ZERO_OFF (0x0000) |
||
955 | |||
956 | #define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)
|
||
957 | #define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)
|
||
958 | #define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
|
||
959 | #define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
|
||
960 | |||
961 | #define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))
|
||
962 | #define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))
|
||
963 | #define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))
|
||
964 | #define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))
|
||
965 | |||
966 | #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
|
||
967 | do { \
|
||
968 | __m128 tmp3, tmp2, tmp1, tmp0; \ |
||
969 | tmp0 = _mm_unpacklo_ps((row0), (row1)); \ |
||
970 | tmp2 = _mm_unpacklo_ps((row2), (row3)); \ |
||
971 | tmp1 = _mm_unpackhi_ps((row0), (row1)); \ |
||
972 | tmp3 = _mm_unpackhi_ps((row2), (row3)); \ |
||
973 | (row0) = _mm_movelh_ps(tmp0, tmp2); \ |
||
974 | (row1) = _mm_movehl_ps(tmp2, tmp0); \ |
||
975 | (row2) = _mm_movelh_ps(tmp1, tmp3); \ |
||
976 | (row3) = _mm_movehl_ps(tmp3, tmp1); \ |
||
977 | } while (0) |
||
978 | |||
979 | /* Aliases for compatibility. */
|
||
980 | #define _m_pextrw _mm_extract_pi16
|
||
981 | #define _m_pinsrw _mm_insert_pi16
|
||
982 | #define _m_pmaxsw _mm_max_pi16
|
||
983 | #define _m_pmaxub _mm_max_pu8
|
||
984 | #define _m_pminsw _mm_min_pi16
|
||
985 | #define _m_pminub _mm_min_pu8
|
||
986 | #define _m_pmovmskb _mm_movemask_pi8
|
||
987 | #define _m_pmulhuw _mm_mulhi_pu16
|
||
988 | #define _m_pshufw _mm_shuffle_pi16
|
||
989 | #define _m_maskmovq _mm_maskmove_si64
|
||
990 | #define _m_pavgb _mm_avg_pu8
|
||
991 | #define _m_pavgw _mm_avg_pu16
|
||
992 | #define _m_psadbw _mm_sad_pu8
|
||
993 | #define _m_ _mm_
|
||
994 | #define _m_ _mm_
|
||
995 | |||
996 | /* Ugly hack for backwards-compatibility (compatible with gcc) */
|
||
997 | #ifdef __SSE2__
|
||
998 | #include <emmintrin.h> |
||
999 | #endif
|
||
1000 | |||
1001 | #endif /* __SSE__ */ |
||
1002 | |||
1003 | #endif /* __XMMINTRIN_H */ |