blob: 8187bcd6b28e33590c3ed1475d74dcca5a7e4f76 [file] [log] [blame]
Aurimas Liutikas9dfe7862020-09-23 10:10:44 -07001/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLDQINTRIN_H
29#define __AVX512VLDQINTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
33
34static __inline__ __m256i __DEFAULT_FN_ATTRS
35_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
36 return (__m256i) ((__v4du) __A * (__v4du) __B);
37}
38
39static __inline__ __m256i __DEFAULT_FN_ATTRS
40_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
41 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
42 (__v4di) __B,
43 (__v4di) __W,
44 (__mmask8) __U);
45}
46
47static __inline__ __m256i __DEFAULT_FN_ATTRS
48_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
49 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
50 (__v4di) __B,
51 (__v4di)
52 _mm256_setzero_si256 (),
53 (__mmask8) __U);
54}
55
56static __inline__ __m128i __DEFAULT_FN_ATTRS
57_mm_mullo_epi64 (__m128i __A, __m128i __B) {
58 return (__m128i) ((__v2du) __A * (__v2du) __B);
59}
60
61static __inline__ __m128i __DEFAULT_FN_ATTRS
62_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
63 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
64 (__v2di) __B,
65 (__v2di) __W,
66 (__mmask8) __U);
67}
68
69static __inline__ __m128i __DEFAULT_FN_ATTRS
70_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
71 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
72 (__v2di) __B,
73 (__v2di)
74 _mm_setzero_si128 (),
75 (__mmask8) __U);
76}
77
78static __inline__ __m256d __DEFAULT_FN_ATTRS
79_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
80 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
81 (__v4df) __B,
82 (__v4df) __W,
83 (__mmask8) __U);
84}
85
86static __inline__ __m256d __DEFAULT_FN_ATTRS
87_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
88 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
89 (__v4df) __B,
90 (__v4df)
91 _mm256_setzero_pd (),
92 (__mmask8) __U);
93}
94
95static __inline__ __m128d __DEFAULT_FN_ATTRS
96_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
97 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
98 (__v2df) __B,
99 (__v2df) __W,
100 (__mmask8) __U);
101}
102
103static __inline__ __m128d __DEFAULT_FN_ATTRS
104_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
105 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
106 (__v2df) __B,
107 (__v2df)
108 _mm_setzero_pd (),
109 (__mmask8) __U);
110}
111
112static __inline__ __m256 __DEFAULT_FN_ATTRS
113_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
114 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
115 (__v8sf) __B,
116 (__v8sf) __W,
117 (__mmask8) __U);
118}
119
120static __inline__ __m256 __DEFAULT_FN_ATTRS
121_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
122 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
123 (__v8sf) __B,
124 (__v8sf)
125 _mm256_setzero_ps (),
126 (__mmask8) __U);
127}
128
129static __inline__ __m128 __DEFAULT_FN_ATTRS
130_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
131 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
132 (__v4sf) __B,
133 (__v4sf) __W,
134 (__mmask8) __U);
135}
136
137static __inline__ __m128 __DEFAULT_FN_ATTRS
138_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
139 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
140 (__v4sf) __B,
141 (__v4sf)
142 _mm_setzero_ps (),
143 (__mmask8) __U);
144}
145
146static __inline__ __m256d __DEFAULT_FN_ATTRS
147_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
148 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
149 (__v4df) __B,
150 (__v4df) __W,
151 (__mmask8) __U);
152}
153
154static __inline__ __m256d __DEFAULT_FN_ATTRS
155_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
156 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
157 (__v4df) __B,
158 (__v4df)
159 _mm256_setzero_pd (),
160 (__mmask8) __U);
161}
162
163static __inline__ __m128d __DEFAULT_FN_ATTRS
164_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
165 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
166 (__v2df) __B,
167 (__v2df) __W,
168 (__mmask8) __U);
169}
170
171static __inline__ __m128d __DEFAULT_FN_ATTRS
172_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
173 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
174 (__v2df) __B,
175 (__v2df)
176 _mm_setzero_pd (),
177 (__mmask8) __U);
178}
179
180static __inline__ __m256 __DEFAULT_FN_ATTRS
181_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
182 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
183 (__v8sf) __B,
184 (__v8sf) __W,
185 (__mmask8) __U);
186}
187
188static __inline__ __m256 __DEFAULT_FN_ATTRS
189_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
190 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
191 (__v8sf) __B,
192 (__v8sf)
193 _mm256_setzero_ps (),
194 (__mmask8) __U);
195}
196
197static __inline__ __m128 __DEFAULT_FN_ATTRS
198_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
199 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
200 (__v4sf) __B,
201 (__v4sf) __W,
202 (__mmask8) __U);
203}
204
205static __inline__ __m128 __DEFAULT_FN_ATTRS
206_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
207 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
208 (__v4sf) __B,
209 (__v4sf)
210 _mm_setzero_ps (),
211 (__mmask8) __U);
212}
213
214static __inline__ __m256d __DEFAULT_FN_ATTRS
215_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
216 __m256d __B) {
217 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
218 (__v4df) __B,
219 (__v4df) __W,
220 (__mmask8) __U);
221}
222
223static __inline__ __m256d __DEFAULT_FN_ATTRS
224_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
225 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
226 (__v4df) __B,
227 (__v4df)
228 _mm256_setzero_pd (),
229 (__mmask8) __U);
230}
231
232static __inline__ __m128d __DEFAULT_FN_ATTRS
233_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
234 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
235 (__v2df) __B,
236 (__v2df) __W,
237 (__mmask8) __U);
238}
239
240static __inline__ __m128d __DEFAULT_FN_ATTRS
241_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
242 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
243 (__v2df) __B,
244 (__v2df)
245 _mm_setzero_pd (),
246 (__mmask8) __U);
247}
248
249static __inline__ __m256 __DEFAULT_FN_ATTRS
250_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
251 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
252 (__v8sf) __B,
253 (__v8sf) __W,
254 (__mmask8) __U);
255}
256
257static __inline__ __m256 __DEFAULT_FN_ATTRS
258_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
259 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
260 (__v8sf) __B,
261 (__v8sf)
262 _mm256_setzero_ps (),
263 (__mmask8) __U);
264}
265
266static __inline__ __m128 __DEFAULT_FN_ATTRS
267_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
268 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
269 (__v4sf) __B,
270 (__v4sf) __W,
271 (__mmask8) __U);
272}
273
274static __inline__ __m128 __DEFAULT_FN_ATTRS
275_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
276 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
277 (__v4sf) __B,
278 (__v4sf)
279 _mm_setzero_ps (),
280 (__mmask8) __U);
281}
282
283static __inline__ __m256d __DEFAULT_FN_ATTRS
284_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
285 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
286 (__v4df) __B,
287 (__v4df) __W,
288 (__mmask8) __U);
289}
290
291static __inline__ __m256d __DEFAULT_FN_ATTRS
292_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
293 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
294 (__v4df) __B,
295 (__v4df)
296 _mm256_setzero_pd (),
297 (__mmask8) __U);
298}
299
300static __inline__ __m128d __DEFAULT_FN_ATTRS
301_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
302 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
303 (__v2df) __B,
304 (__v2df) __W,
305 (__mmask8) __U);
306}
307
308static __inline__ __m128d __DEFAULT_FN_ATTRS
309_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
310 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
311 (__v2df) __B,
312 (__v2df)
313 _mm_setzero_pd (),
314 (__mmask8) __U);
315}
316
317static __inline__ __m256 __DEFAULT_FN_ATTRS
318_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
319 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
320 (__v8sf) __B,
321 (__v8sf) __W,
322 (__mmask8) __U);
323}
324
325static __inline__ __m256 __DEFAULT_FN_ATTRS
326_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
327 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
328 (__v8sf) __B,
329 (__v8sf)
330 _mm256_setzero_ps (),
331 (__mmask8) __U);
332}
333
334static __inline__ __m128 __DEFAULT_FN_ATTRS
335_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
336 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
337 (__v4sf) __B,
338 (__v4sf) __W,
339 (__mmask8) __U);
340}
341
342static __inline__ __m128 __DEFAULT_FN_ATTRS
343_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
344 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
345 (__v4sf) __B,
346 (__v4sf)
347 _mm_setzero_ps (),
348 (__mmask8) __U);
349}
350
351static __inline__ __m128i __DEFAULT_FN_ATTRS
352_mm_cvtpd_epi64 (__m128d __A) {
353 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
354 (__v2di) _mm_setzero_si128(),
355 (__mmask8) -1);
356}
357
358static __inline__ __m128i __DEFAULT_FN_ATTRS
359_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
360 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
361 (__v2di) __W,
362 (__mmask8) __U);
363}
364
365static __inline__ __m128i __DEFAULT_FN_ATTRS
366_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
367 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
368 (__v2di) _mm_setzero_si128(),
369 (__mmask8) __U);
370}
371
372static __inline__ __m256i __DEFAULT_FN_ATTRS
373_mm256_cvtpd_epi64 (__m256d __A) {
374 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
375 (__v4di) _mm256_setzero_si256(),
376 (__mmask8) -1);
377}
378
379static __inline__ __m256i __DEFAULT_FN_ATTRS
380_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
381 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
382 (__v4di) __W,
383 (__mmask8) __U);
384}
385
386static __inline__ __m256i __DEFAULT_FN_ATTRS
387_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
388 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
389 (__v4di) _mm256_setzero_si256(),
390 (__mmask8) __U);
391}
392
393static __inline__ __m128i __DEFAULT_FN_ATTRS
394_mm_cvtpd_epu64 (__m128d __A) {
395 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
396 (__v2di) _mm_setzero_si128(),
397 (__mmask8) -1);
398}
399
400static __inline__ __m128i __DEFAULT_FN_ATTRS
401_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
402 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
403 (__v2di) __W,
404 (__mmask8) __U);
405}
406
407static __inline__ __m128i __DEFAULT_FN_ATTRS
408_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
409 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
410 (__v2di) _mm_setzero_si128(),
411 (__mmask8) __U);
412}
413
414static __inline__ __m256i __DEFAULT_FN_ATTRS
415_mm256_cvtpd_epu64 (__m256d __A) {
416 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
417 (__v4di) _mm256_setzero_si256(),
418 (__mmask8) -1);
419}
420
421static __inline__ __m256i __DEFAULT_FN_ATTRS
422_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
423 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
424 (__v4di) __W,
425 (__mmask8) __U);
426}
427
428static __inline__ __m256i __DEFAULT_FN_ATTRS
429_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
430 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
431 (__v4di) _mm256_setzero_si256(),
432 (__mmask8) __U);
433}
434
435static __inline__ __m128i __DEFAULT_FN_ATTRS
436_mm_cvtps_epi64 (__m128 __A) {
437 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
438 (__v2di) _mm_setzero_si128(),
439 (__mmask8) -1);
440}
441
442static __inline__ __m128i __DEFAULT_FN_ATTRS
443_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
444 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
445 (__v2di) __W,
446 (__mmask8) __U);
447}
448
449static __inline__ __m128i __DEFAULT_FN_ATTRS
450_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
451 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
452 (__v2di) _mm_setzero_si128(),
453 (__mmask8) __U);
454}
455
456static __inline__ __m256i __DEFAULT_FN_ATTRS
457_mm256_cvtps_epi64 (__m128 __A) {
458 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
459 (__v4di) _mm256_setzero_si256(),
460 (__mmask8) -1);
461}
462
463static __inline__ __m256i __DEFAULT_FN_ATTRS
464_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
465 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
466 (__v4di) __W,
467 (__mmask8) __U);
468}
469
470static __inline__ __m256i __DEFAULT_FN_ATTRS
471_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
472 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
473 (__v4di) _mm256_setzero_si256(),
474 (__mmask8) __U);
475}
476
477static __inline__ __m128i __DEFAULT_FN_ATTRS
478_mm_cvtps_epu64 (__m128 __A) {
479 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
480 (__v2di) _mm_setzero_si128(),
481 (__mmask8) -1);
482}
483
484static __inline__ __m128i __DEFAULT_FN_ATTRS
485_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
486 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
487 (__v2di) __W,
488 (__mmask8) __U);
489}
490
491static __inline__ __m128i __DEFAULT_FN_ATTRS
492_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
493 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
494 (__v2di) _mm_setzero_si128(),
495 (__mmask8) __U);
496}
497
498static __inline__ __m256i __DEFAULT_FN_ATTRS
499_mm256_cvtps_epu64 (__m128 __A) {
500 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
501 (__v4di) _mm256_setzero_si256(),
502 (__mmask8) -1);
503}
504
505static __inline__ __m256i __DEFAULT_FN_ATTRS
506_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
507 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
508 (__v4di) __W,
509 (__mmask8) __U);
510}
511
512static __inline__ __m256i __DEFAULT_FN_ATTRS
513_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
514 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
515 (__v4di) _mm256_setzero_si256(),
516 (__mmask8) __U);
517}
518
519static __inline__ __m128d __DEFAULT_FN_ATTRS
520_mm_cvtepi64_pd (__m128i __A) {
521 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
522 (__v2df) _mm_setzero_pd(),
523 (__mmask8) -1);
524}
525
526static __inline__ __m128d __DEFAULT_FN_ATTRS
527_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
528 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
529 (__v2df) __W,
530 (__mmask8) __U);
531}
532
533static __inline__ __m128d __DEFAULT_FN_ATTRS
534_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
535 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
536 (__v2df) _mm_setzero_pd(),
537 (__mmask8) __U);
538}
539
540static __inline__ __m256d __DEFAULT_FN_ATTRS
541_mm256_cvtepi64_pd (__m256i __A) {
542 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
543 (__v4df) _mm256_setzero_pd(),
544 (__mmask8) -1);
545}
546
547static __inline__ __m256d __DEFAULT_FN_ATTRS
548_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
549 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
550 (__v4df) __W,
551 (__mmask8) __U);
552}
553
554static __inline__ __m256d __DEFAULT_FN_ATTRS
555_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
556 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
557 (__v4df) _mm256_setzero_pd(),
558 (__mmask8) __U);
559}
560
561static __inline__ __m128 __DEFAULT_FN_ATTRS
562_mm_cvtepi64_ps (__m128i __A) {
563 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
564 (__v4sf) _mm_setzero_ps(),
565 (__mmask8) -1);
566}
567
568static __inline__ __m128 __DEFAULT_FN_ATTRS
569_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
570 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
571 (__v4sf) __W,
572 (__mmask8) __U);
573}
574
575static __inline__ __m128 __DEFAULT_FN_ATTRS
576_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
577 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
578 (__v4sf) _mm_setzero_ps(),
579 (__mmask8) __U);
580}
581
582static __inline__ __m128 __DEFAULT_FN_ATTRS
583_mm256_cvtepi64_ps (__m256i __A) {
584 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
585 (__v4sf) _mm_setzero_ps(),
586 (__mmask8) -1);
587}
588
589static __inline__ __m128 __DEFAULT_FN_ATTRS
590_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
591 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
592 (__v4sf) __W,
593 (__mmask8) __U);
594}
595
596static __inline__ __m128 __DEFAULT_FN_ATTRS
597_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
598 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
599 (__v4sf) _mm_setzero_ps(),
600 (__mmask8) __U);
601}
602
603static __inline__ __m128i __DEFAULT_FN_ATTRS
604_mm_cvttpd_epi64 (__m128d __A) {
605 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
606 (__v2di) _mm_setzero_si128(),
607 (__mmask8) -1);
608}
609
610static __inline__ __m128i __DEFAULT_FN_ATTRS
611_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
612 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
613 (__v2di) __W,
614 (__mmask8) __U);
615}
616
617static __inline__ __m128i __DEFAULT_FN_ATTRS
618_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
619 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
620 (__v2di) _mm_setzero_si128(),
621 (__mmask8) __U);
622}
623
624static __inline__ __m256i __DEFAULT_FN_ATTRS
625_mm256_cvttpd_epi64 (__m256d __A) {
626 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
627 (__v4di) _mm256_setzero_si256(),
628 (__mmask8) -1);
629}
630
631static __inline__ __m256i __DEFAULT_FN_ATTRS
632_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
633 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
634 (__v4di) __W,
635 (__mmask8) __U);
636}
637
638static __inline__ __m256i __DEFAULT_FN_ATTRS
639_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
640 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
641 (__v4di) _mm256_setzero_si256(),
642 (__mmask8) __U);
643}
644
645static __inline__ __m128i __DEFAULT_FN_ATTRS
646_mm_cvttpd_epu64 (__m128d __A) {
647 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
648 (__v2di) _mm_setzero_si128(),
649 (__mmask8) -1);
650}
651
652static __inline__ __m128i __DEFAULT_FN_ATTRS
653_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
654 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
655 (__v2di) __W,
656 (__mmask8) __U);
657}
658
659static __inline__ __m128i __DEFAULT_FN_ATTRS
660_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
661 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
662 (__v2di) _mm_setzero_si128(),
663 (__mmask8) __U);
664}
665
666static __inline__ __m256i __DEFAULT_FN_ATTRS
667_mm256_cvttpd_epu64 (__m256d __A) {
668 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
669 (__v4di) _mm256_setzero_si256(),
670 (__mmask8) -1);
671}
672
673static __inline__ __m256i __DEFAULT_FN_ATTRS
674_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
675 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
676 (__v4di) __W,
677 (__mmask8) __U);
678}
679
680static __inline__ __m256i __DEFAULT_FN_ATTRS
681_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
682 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
683 (__v4di) _mm256_setzero_si256(),
684 (__mmask8) __U);
685}
686
687static __inline__ __m128i __DEFAULT_FN_ATTRS
688_mm_cvttps_epi64 (__m128 __A) {
689 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
690 (__v2di) _mm_setzero_si128(),
691 (__mmask8) -1);
692}
693
694static __inline__ __m128i __DEFAULT_FN_ATTRS
695_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
696 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
697 (__v2di) __W,
698 (__mmask8) __U);
699}
700
701static __inline__ __m128i __DEFAULT_FN_ATTRS
702_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
703 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
704 (__v2di) _mm_setzero_si128(),
705 (__mmask8) __U);
706}
707
708static __inline__ __m256i __DEFAULT_FN_ATTRS
709_mm256_cvttps_epi64 (__m128 __A) {
710 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
711 (__v4di) _mm256_setzero_si256(),
712 (__mmask8) -1);
713}
714
715static __inline__ __m256i __DEFAULT_FN_ATTRS
716_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
717 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
718 (__v4di) __W,
719 (__mmask8) __U);
720}
721
722static __inline__ __m256i __DEFAULT_FN_ATTRS
723_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
724 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
725 (__v4di) _mm256_setzero_si256(),
726 (__mmask8) __U);
727}
728
729static __inline__ __m128i __DEFAULT_FN_ATTRS
730_mm_cvttps_epu64 (__m128 __A) {
731 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
732 (__v2di) _mm_setzero_si128(),
733 (__mmask8) -1);
734}
735
736static __inline__ __m128i __DEFAULT_FN_ATTRS
737_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
738 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
739 (__v2di) __W,
740 (__mmask8) __U);
741}
742
743static __inline__ __m128i __DEFAULT_FN_ATTRS
744_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
745 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
746 (__v2di) _mm_setzero_si128(),
747 (__mmask8) __U);
748}
749
750static __inline__ __m256i __DEFAULT_FN_ATTRS
751_mm256_cvttps_epu64 (__m128 __A) {
752 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
753 (__v4di) _mm256_setzero_si256(),
754 (__mmask8) -1);
755}
756
757static __inline__ __m256i __DEFAULT_FN_ATTRS
758_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
759 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
760 (__v4di) __W,
761 (__mmask8) __U);
762}
763
764static __inline__ __m256i __DEFAULT_FN_ATTRS
765_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
766 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
767 (__v4di) _mm256_setzero_si256(),
768 (__mmask8) __U);
769}
770
771static __inline__ __m128d __DEFAULT_FN_ATTRS
772_mm_cvtepu64_pd (__m128i __A) {
773 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
774 (__v2df) _mm_setzero_pd(),
775 (__mmask8) -1);
776}
777
778static __inline__ __m128d __DEFAULT_FN_ATTRS
779_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
780 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
781 (__v2df) __W,
782 (__mmask8) __U);
783}
784
785static __inline__ __m128d __DEFAULT_FN_ATTRS
786_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
787 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
788 (__v2df) _mm_setzero_pd(),
789 (__mmask8) __U);
790}
791
792static __inline__ __m256d __DEFAULT_FN_ATTRS
793_mm256_cvtepu64_pd (__m256i __A) {
794 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
795 (__v4df) _mm256_setzero_pd(),
796 (__mmask8) -1);
797}
798
799static __inline__ __m256d __DEFAULT_FN_ATTRS
800_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
801 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
802 (__v4df) __W,
803 (__mmask8) __U);
804}
805
806static __inline__ __m256d __DEFAULT_FN_ATTRS
807_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
808 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
809 (__v4df) _mm256_setzero_pd(),
810 (__mmask8) __U);
811}
812
813static __inline__ __m128 __DEFAULT_FN_ATTRS
814_mm_cvtepu64_ps (__m128i __A) {
815 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
816 (__v4sf) _mm_setzero_ps(),
817 (__mmask8) -1);
818}
819
820static __inline__ __m128 __DEFAULT_FN_ATTRS
821_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
822 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
823 (__v4sf) __W,
824 (__mmask8) __U);
825}
826
827static __inline__ __m128 __DEFAULT_FN_ATTRS
828_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
829 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
830 (__v4sf) _mm_setzero_ps(),
831 (__mmask8) __U);
832}
833
834static __inline__ __m128 __DEFAULT_FN_ATTRS
835_mm256_cvtepu64_ps (__m256i __A) {
836 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
837 (__v4sf) _mm_setzero_ps(),
838 (__mmask8) -1);
839}
840
841static __inline__ __m128 __DEFAULT_FN_ATTRS
842_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
843 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
844 (__v4sf) __W,
845 (__mmask8) __U);
846}
847
848static __inline__ __m128 __DEFAULT_FN_ATTRS
849_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
850 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
851 (__v4sf) _mm_setzero_ps(),
852 (__mmask8) __U);
853}
854
855#define _mm_range_pd(A, B, C) __extension__ ({ \
856 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
857 (__v2df)(__m128d)(B), (int)(C), \
858 (__v2df)_mm_setzero_pd(), \
859 (__mmask8)-1); })
860
861#define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({ \
862 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
863 (__v2df)(__m128d)(B), (int)(C), \
864 (__v2df)(__m128d)(W), \
865 (__mmask8)(U)); })
866
867#define _mm_maskz_range_pd(U, A, B, C) __extension__ ({ \
868 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
869 (__v2df)(__m128d)(B), (int)(C), \
870 (__v2df)_mm_setzero_pd(), \
871 (__mmask8)(U)); })
872
873#define _mm256_range_pd(A, B, C) __extension__ ({ \
874 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
875 (__v4df)(__m256d)(B), (int)(C), \
876 (__v4df)_mm256_setzero_pd(), \
877 (__mmask8)-1); })
878
879#define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({ \
880 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
881 (__v4df)(__m256d)(B), (int)(C), \
882 (__v4df)(__m256d)(W), \
883 (__mmask8)(U)); })
884
885#define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({ \
886 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
887 (__v4df)(__m256d)(B), (int)(C), \
888 (__v4df)_mm256_setzero_pd(), \
889 (__mmask8)(U)); })
890
891#define _mm_range_ps(A, B, C) __extension__ ({ \
892 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
893 (__v4sf)(__m128)(B), (int)(C), \
894 (__v4sf)_mm_setzero_ps(), \
895 (__mmask8)-1); })
896
897#define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({ \
898 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
899 (__v4sf)(__m128)(B), (int)(C), \
900 (__v4sf)(__m128)(W), (__mmask8)(U)); })
901
902#define _mm_maskz_range_ps(U, A, B, C) __extension__ ({ \
903 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
904 (__v4sf)(__m128)(B), (int)(C), \
905 (__v4sf)_mm_setzero_ps(), \
906 (__mmask8)(U)); })
907
908#define _mm256_range_ps(A, B, C) __extension__ ({ \
909 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
910 (__v8sf)(__m256)(B), (int)(C), \
911 (__v8sf)_mm256_setzero_ps(), \
912 (__mmask8)-1); })
913
914#define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({ \
915 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
916 (__v8sf)(__m256)(B), (int)(C), \
917 (__v8sf)(__m256)(W), (__mmask8)(U)); })
918
919#define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({ \
920 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
921 (__v8sf)(__m256)(B), (int)(C), \
922 (__v8sf)_mm256_setzero_ps(), \
923 (__mmask8)(U)); })
924
925#define _mm_reduce_pd(A, B) __extension__ ({ \
926 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
927 (__v2df)_mm_setzero_pd(), \
928 (__mmask8)-1); })
929
930#define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \
931 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
932 (__v2df)(__m128d)(W), \
933 (__mmask8)(U)); })
934
935#define _mm_maskz_reduce_pd(U, A, B) __extension__ ({ \
936 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
937 (__v2df)_mm_setzero_pd(), \
938 (__mmask8)(U)); })
939
940#define _mm256_reduce_pd(A, B) __extension__ ({ \
941 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
942 (__v4df)_mm256_setzero_pd(), \
943 (__mmask8)-1); })
944
945#define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \
946 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
947 (__v4df)(__m256d)(W), \
948 (__mmask8)(U)); })
949
950#define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({ \
951 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
952 (__v4df)_mm256_setzero_pd(), \
953 (__mmask8)(U)); })
954
955#define _mm_reduce_ps(A, B) __extension__ ({ \
956 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
957 (__v4sf)_mm_setzero_ps(), \
958 (__mmask8)-1); })
959
960#define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({ \
961 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
962 (__v4sf)(__m128)(W), \
963 (__mmask8)(U)); })
964
965#define _mm_maskz_reduce_ps(U, A, B) __extension__ ({ \
966 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
967 (__v4sf)_mm_setzero_ps(), \
968 (__mmask8)(U)); })
969
970#define _mm256_reduce_ps(A, B) __extension__ ({ \
971 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
972 (__v8sf)_mm256_setzero_ps(), \
973 (__mmask8)-1); })
974
975#define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \
976 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
977 (__v8sf)(__m256)(W), \
978 (__mmask8)(U)); })
979
980#define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({ \
981 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
982 (__v8sf)_mm256_setzero_ps(), \
983 (__mmask8)(U)); })
984
985static __inline__ __mmask8 __DEFAULT_FN_ATTRS
986_mm_movepi32_mask (__m128i __A)
987{
988 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
989}
990
991static __inline__ __mmask8 __DEFAULT_FN_ATTRS
992_mm256_movepi32_mask (__m256i __A)
993{
994 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
995}
996
997static __inline__ __m128i __DEFAULT_FN_ATTRS
998_mm_movm_epi32 (__mmask8 __A)
999{
1000 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
1001}
1002
1003static __inline__ __m256i __DEFAULT_FN_ATTRS
1004_mm256_movm_epi32 (__mmask8 __A)
1005{
1006 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
1007}
1008
1009static __inline__ __m128i __DEFAULT_FN_ATTRS
1010_mm_movm_epi64 (__mmask8 __A)
1011{
1012 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
1013}
1014
1015static __inline__ __m256i __DEFAULT_FN_ATTRS
1016_mm256_movm_epi64 (__mmask8 __A)
1017{
1018 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
1019}
1020
1021static __inline__ __mmask8 __DEFAULT_FN_ATTRS
1022_mm_movepi64_mask (__m128i __A)
1023{
1024 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
1025}
1026
1027static __inline__ __mmask8 __DEFAULT_FN_ATTRS
1028_mm256_movepi64_mask (__m256i __A)
1029{
1030 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
1031}
1032
1033static __inline__ __m256 __DEFAULT_FN_ATTRS
1034_mm256_broadcast_f32x2 (__m128 __A)
1035{
1036 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
1037 (__v8sf)_mm256_undefined_ps(),
1038 (__mmask8) -1);
1039}
1040
1041static __inline__ __m256 __DEFAULT_FN_ATTRS
1042_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
1043{
1044 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
1045 (__v8sf) __O,
1046 __M);
1047}
1048
1049static __inline__ __m256 __DEFAULT_FN_ATTRS
1050_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
1051{
1052 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
1053 (__v8sf) _mm256_setzero_ps (),
1054 __M);
1055}
1056
1057static __inline__ __m256d __DEFAULT_FN_ATTRS
1058_mm256_broadcast_f64x2 (__m128d __A)
1059{
1060 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
1061 (__v4df)_mm256_undefined_pd(),
1062 (__mmask8) -1);
1063}
1064
1065static __inline__ __m256d __DEFAULT_FN_ATTRS
1066_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
1067{
1068 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
1069 (__v4df) __O,
1070 __M);
1071}
1072
1073static __inline__ __m256d __DEFAULT_FN_ATTRS
1074_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1075{
1076 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
1077 (__v4df) _mm256_setzero_ps (),
1078 __M);
1079}
1080
1081static __inline__ __m128i __DEFAULT_FN_ATTRS
1082_mm_broadcast_i32x2 (__m128i __A)
1083{
1084 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1085 (__v4si)_mm_undefined_si128(),
1086 (__mmask8) -1);
1087}
1088
1089static __inline__ __m128i __DEFAULT_FN_ATTRS
1090_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1091{
1092 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1093 (__v4si) __O,
1094 __M);
1095}
1096
1097static __inline__ __m128i __DEFAULT_FN_ATTRS
1098_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1099{
1100 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1101 (__v4si) _mm_setzero_si128 (),
1102 __M);
1103}
1104
1105static __inline__ __m256i __DEFAULT_FN_ATTRS
1106_mm256_broadcast_i32x2 (__m128i __A)
1107{
1108 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1109 (__v8si)_mm256_undefined_si256(),
1110 (__mmask8) -1);
1111}
1112
1113static __inline__ __m256i __DEFAULT_FN_ATTRS
1114_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1115{
1116 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1117 (__v8si) __O,
1118 __M);
1119}
1120
1121static __inline__ __m256i __DEFAULT_FN_ATTRS
1122_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1123{
1124 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1125 (__v8si) _mm256_setzero_si256 (),
1126 __M);
1127}
1128
1129static __inline__ __m256i __DEFAULT_FN_ATTRS
1130_mm256_broadcast_i64x2 (__m128i __A)
1131{
1132 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
1133 (__v4di)_mm256_undefined_si256(),
1134 (__mmask8) -1);
1135}
1136
1137static __inline__ __m256i __DEFAULT_FN_ATTRS
1138_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
1139{
1140 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
1141 (__v4di) __O,
1142 __M);
1143}
1144
1145static __inline__ __m256i __DEFAULT_FN_ATTRS
1146_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1147{
1148 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
1149 (__v4di) _mm256_setzero_si256 (),
1150 __M);
1151}
1152
1153#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
1154 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1155 (int)(imm), \
1156 (__v2df)_mm_setzero_pd(), \
1157 (__mmask8)-1); })
1158
1159#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1160 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1161 (int)(imm), \
1162 (__v2df)(__m128d)(W), \
1163 (__mmask8)(U)); })
1164
1165#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1166 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1167 (int)(imm), \
1168 (__v2df)_mm_setzero_pd(), \
1169 (__mmask8)(U)); })
1170
1171#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
1172 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1173 (int)(imm), \
1174 (__v2di)_mm_setzero_di(), \
1175 (__mmask8)-1); })
1176
1177#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1178 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1179 (int)(imm), \
1180 (__v2di)(__m128i)(W), \
1181 (__mmask8)(U)); })
1182
1183#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1184 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1185 (int)(imm), \
1186 (__v2di)_mm_setzero_di(), \
1187 (__mmask8)(U)); })
1188
1189#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
1190 (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
1191 (__v2df)(__m128d)(B), \
1192 (int)(imm), \
1193 (__v4df)_mm256_setzero_pd(), \
1194 (__mmask8)-1); })
1195
1196#define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1197 (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
1198 (__v2df)(__m128d)(B), \
1199 (int)(imm), \
1200 (__v4df)(__m256d)(W), \
1201 (__mmask8)(U)); })
1202
1203#define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1204 (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
1205 (__v2df)(__m128d)(B), \
1206 (int)(imm), \
1207 (__v4df)_mm256_setzero_pd(), \
1208 (__mmask8)(U)); })
1209
1210#define _mm256_inserti64x2(A, B, imm) __extension__ ({ \
1211 (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
1212 (__v2di)(__m128i)(B), \
1213 (int)(imm), \
1214 (__v4di)_mm256_setzero_si256(), \
1215 (__mmask8)-1); })
1216
1217#define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1218 (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
1219 (__v2di)(__m128i)(B), \
1220 (int)(imm), \
1221 (__v4di)(__m256i)(W), \
1222 (__mmask8)(U)); })
1223
1224#define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1225 (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
1226 (__v2di)(__m128i)(B), \
1227 (int)(imm), \
1228 (__v4di)_mm256_setzero_si256(), \
1229 (__mmask8)(U)); })
1230
1231#define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1232 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1233 (__mmask8)(U)); })
1234
1235#define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \
1236 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1237 (__mmask8)-1); })
1238
1239#define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1240 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1241 (__mmask8)(U)); })
1242
1243#define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \
1244 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1245 (__mmask8)-1); })
1246
1247#define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1248 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1249 (__mmask8)(U)); })
1250
1251#define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \
1252 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1253 (__mmask8)-1); })
1254
1255#define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1256 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1257 (__mmask8)(U)); })
1258
1259#define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \
1260 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1261 (__mmask8)-1); })
1262
1263#undef __DEFAULT_FN_ATTRS
1264
1265#endif