Project

General

Profile

Statistics
| Revision:

root / lab4 / .minix-src / include / clang-3.6 / fma4intrin.h @ 14

History | View | Annotate | Download (7.44 KB)

1
/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
2
 *
3
 * Permission is hereby granted, free of charge, to any person obtaining a copy
4
 * of this software and associated documentation files (the "Software"), to deal
5
 * in the Software without restriction, including without limitation the rights
6
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
 * copies of the Software, and to permit persons to whom the Software is
8
 * furnished to do so, subject to the following conditions:
9
 *
10
 * The above copyright notice and this permission notice shall be included in
11
 * all copies or substantial portions of the Software.
12
 *
13
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
 * THE SOFTWARE.
20
 *
21
 *===-----------------------------------------------------------------------===
22
 */
23

    
24
#ifndef __X86INTRIN_H
25
#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
26
#endif
27

    
28
#ifndef __FMA4INTRIN_H
29
#define __FMA4INTRIN_H
30

    
31
#ifndef __FMA4__
32
# error "FMA4 instruction set is not enabled"
33
#else
34

    
35
#include <pmmintrin.h>
36

    
37
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
38
_mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
39
{
40
  return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
41
}
42

    
43
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
44
_mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
45
{
46
  return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
47
}
48

    
49
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
50
_mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
51
{
52
  return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
53
}
54

    
55
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
56
_mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
57
{
58
  return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
59
}
60

    
61
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
62
_mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
63
{
64
  return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
65
}
66

    
67
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
68
_mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
69
{
70
  return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
71
}
72

    
73
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
74
_mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
75
{
76
  return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
77
}
78

    
79
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
80
_mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
81
{
82
  return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
83
}
84

    
85
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
86
_mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
87
{
88
  return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
89
}
90

    
91
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
92
_mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
93
{
94
  return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
95
}
96

    
97
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
98
_mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
99
{
100
  return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
101
}
102

    
103
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
104
_mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
105
{
106
  return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
107
}
108

    
109
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
110
_mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
111
{
112
  return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
113
}
114

    
115
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
116
_mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
117
{
118
  return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
119
}
120

    
121
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
122
_mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
123
{
124
  return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
125
}
126

    
127
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
128
_mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
129
{
130
  return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
131
}
132

    
133
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
134
_mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
135
{
136
  return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
137
}
138

    
139
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
140
_mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
141
{
142
  return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
143
}
144

    
145
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
146
_mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
147
{
148
  return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
149
}
150

    
151
static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
152
_mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
153
{
154
  return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
155
}
156

    
157
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
158
_mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
159
{
160
  return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
161
}
162

    
163
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
164
_mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
165
{
166
  return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
167
}
168

    
169
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
170
_mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
171
{
172
  return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
173
}
174

    
175
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
176
_mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
177
{
178
  return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
179
}
180

    
181
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
182
_mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
183
{
184
  return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
185
}
186

    
187
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
188
_mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
189
{
190
  return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
191
}
192

    
193
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
194
_mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
195
{
196
  return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
197
}
198

    
199
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
200
_mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
201
{
202
  return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
203
}
204

    
205
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
206
_mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
207
{
208
  return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
209
}
210

    
211
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
212
_mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
213
{
214
  return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
215
}
216

    
217
static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
218
_mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
219
{
220
  return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
221
}
222

    
223
static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
224
_mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
225
{
226
  return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
227
}
228

    
229
#endif /* __FMA4__ */
230

    
231
#endif /* __FMA4INTRIN_H */