LLVM 20.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Metadata.h"
39#include "llvm/IR/Module.h"
40#include "llvm/IR/Value.h"
41#include "llvm/IR/Verifier.h"
45#include "llvm/Support/Regex.h"
47#include <cstring>
48
49using namespace llvm;
50
51static cl::opt<bool>
52 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
53 cl::desc("Disable autoupgrade of debug info"));
54
55static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
56
57// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
58// changed their type from v4f32 to v2i64.
60 Function *&NewFn) {
61 // Check whether this is an old version of the function, which received
62 // v4f32 arguments.
63 Type *Arg0Type = F->getFunctionType()->getParamType(0);
64 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
65 return false;
66
67 // Yes, it's old, replace it with new version.
68 rename(F);
69 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
70 return true;
71}
72
73// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
74// arguments have changed their type from i32 to i8.
76 Function *&NewFn) {
77 // Check that the last argument is an i32.
78 Type *LastArgType = F->getFunctionType()->getParamType(
79 F->getFunctionType()->getNumParams() - 1);
80 if (!LastArgType->isIntegerTy(32))
81 return false;
82
83 // Move this function aside and map down.
84 rename(F);
85 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
86 return true;
87}
88
89// Upgrade the declaration of fp compare intrinsics that change return type
90// from scalar to vXi1 mask.
92 Function *&NewFn) {
93 // Check if the return type is a vector.
94 if (F->getReturnType()->isVectorTy())
95 return false;
96
97 rename(F);
98 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
99 return true;
100}
101
103 Function *&NewFn) {
104 if (F->getReturnType()->getScalarType()->isBFloatTy())
105 return false;
106
107 rename(F);
108 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
109 return true;
110}
111
113 Function *&NewFn) {
114 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
115 return false;
116
117 rename(F);
118 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
119 return true;
120}
121
123 // All of the intrinsics matches below should be marked with which llvm
124 // version started autoupgrading them. At some point in the future we would
125 // like to use this information to remove upgrade code for some older
126 // intrinsics. It is currently undecided how we will determine that future
127 // point.
128 if (Name.consume_front("avx."))
129 return (Name.starts_with("blend.p") || // Added in 3.7
130 Name == "cvt.ps2.pd.256" || // Added in 3.9
131 Name == "cvtdq2.pd.256" || // Added in 3.9
132 Name == "cvtdq2.ps.256" || // Added in 7.0
133 Name.starts_with("movnt.") || // Added in 3.2
134 Name.starts_with("sqrt.p") || // Added in 7.0
135 Name.starts_with("storeu.") || // Added in 3.9
136 Name.starts_with("vbroadcast.s") || // Added in 3.5
137 Name.starts_with("vbroadcastf128") || // Added in 4.0
138 Name.starts_with("vextractf128.") || // Added in 3.7
139 Name.starts_with("vinsertf128.") || // Added in 3.7
140 Name.starts_with("vperm2f128.") || // Added in 6.0
141 Name.starts_with("vpermil.")); // Added in 3.1
142
143 if (Name.consume_front("avx2."))
144 return (Name == "movntdqa" || // Added in 5.0
145 Name.starts_with("pabs.") || // Added in 6.0
146 Name.starts_with("padds.") || // Added in 8.0
147 Name.starts_with("paddus.") || // Added in 8.0
148 Name.starts_with("pblendd.") || // Added in 3.7
149 Name == "pblendw" || // Added in 3.7
150 Name.starts_with("pbroadcast") || // Added in 3.8
151 Name.starts_with("pcmpeq.") || // Added in 3.1
152 Name.starts_with("pcmpgt.") || // Added in 3.1
153 Name.starts_with("pmax") || // Added in 3.9
154 Name.starts_with("pmin") || // Added in 3.9
155 Name.starts_with("pmovsx") || // Added in 3.9
156 Name.starts_with("pmovzx") || // Added in 3.9
157 Name == "pmul.dq" || // Added in 7.0
158 Name == "pmulu.dq" || // Added in 7.0
159 Name.starts_with("psll.dq") || // Added in 3.7
160 Name.starts_with("psrl.dq") || // Added in 3.7
161 Name.starts_with("psubs.") || // Added in 8.0
162 Name.starts_with("psubus.") || // Added in 8.0
163 Name.starts_with("vbroadcast") || // Added in 3.8
164 Name == "vbroadcasti128" || // Added in 3.7
165 Name == "vextracti128" || // Added in 3.7
166 Name == "vinserti128" || // Added in 3.7
167 Name == "vperm2i128"); // Added in 6.0
168
169 if (Name.consume_front("avx512.")) {
170 if (Name.consume_front("mask."))
171 // 'avx512.mask.*'
172 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
173 Name.starts_with("and.") || // Added in 3.9
174 Name.starts_with("andn.") || // Added in 3.9
175 Name.starts_with("broadcast.s") || // Added in 3.9
176 Name.starts_with("broadcastf32x4.") || // Added in 6.0
177 Name.starts_with("broadcastf32x8.") || // Added in 6.0
178 Name.starts_with("broadcastf64x2.") || // Added in 6.0
179 Name.starts_with("broadcastf64x4.") || // Added in 6.0
180 Name.starts_with("broadcasti32x4.") || // Added in 6.0
181 Name.starts_with("broadcasti32x8.") || // Added in 6.0
182 Name.starts_with("broadcasti64x2.") || // Added in 6.0
183 Name.starts_with("broadcasti64x4.") || // Added in 6.0
184 Name.starts_with("cmp.b") || // Added in 5.0
185 Name.starts_with("cmp.d") || // Added in 5.0
186 Name.starts_with("cmp.q") || // Added in 5.0
187 Name.starts_with("cmp.w") || // Added in 5.0
188 Name.starts_with("compress.b") || // Added in 9.0
189 Name.starts_with("compress.d") || // Added in 9.0
190 Name.starts_with("compress.p") || // Added in 9.0
191 Name.starts_with("compress.q") || // Added in 9.0
192 Name.starts_with("compress.store.") || // Added in 7.0
193 Name.starts_with("compress.w") || // Added in 9.0
194 Name.starts_with("conflict.") || // Added in 9.0
195 Name.starts_with("cvtdq2pd.") || // Added in 4.0
196 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
197 Name == "cvtpd2dq.256" || // Added in 7.0
198 Name == "cvtpd2ps.256" || // Added in 7.0
199 Name == "cvtps2pd.128" || // Added in 7.0
200 Name == "cvtps2pd.256" || // Added in 7.0
201 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
202 Name == "cvtqq2ps.256" || // Added in 9.0
203 Name == "cvtqq2ps.512" || // Added in 9.0
204 Name == "cvttpd2dq.256" || // Added in 7.0
205 Name == "cvttps2dq.128" || // Added in 7.0
206 Name == "cvttps2dq.256" || // Added in 7.0
207 Name.starts_with("cvtudq2pd.") || // Added in 4.0
208 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
209 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
210 Name == "cvtuqq2ps.256" || // Added in 9.0
211 Name == "cvtuqq2ps.512" || // Added in 9.0
212 Name.starts_with("dbpsadbw.") || // Added in 7.0
213 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
214 Name.starts_with("expand.b") || // Added in 9.0
215 Name.starts_with("expand.d") || // Added in 9.0
216 Name.starts_with("expand.load.") || // Added in 7.0
217 Name.starts_with("expand.p") || // Added in 9.0
218 Name.starts_with("expand.q") || // Added in 9.0
219 Name.starts_with("expand.w") || // Added in 9.0
220 Name.starts_with("fpclass.p") || // Added in 7.0
221 Name.starts_with("insert") || // Added in 4.0
222 Name.starts_with("load.") || // Added in 3.9
223 Name.starts_with("loadu.") || // Added in 3.9
224 Name.starts_with("lzcnt.") || // Added in 5.0
225 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
226 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
227 Name.starts_with("movddup") || // Added in 3.9
228 Name.starts_with("move.s") || // Added in 4.0
229 Name.starts_with("movshdup") || // Added in 3.9
230 Name.starts_with("movsldup") || // Added in 3.9
231 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
232 Name.starts_with("or.") || // Added in 3.9
233 Name.starts_with("pabs.") || // Added in 6.0
234 Name.starts_with("packssdw.") || // Added in 5.0
235 Name.starts_with("packsswb.") || // Added in 5.0
236 Name.starts_with("packusdw.") || // Added in 5.0
237 Name.starts_with("packuswb.") || // Added in 5.0
238 Name.starts_with("padd.") || // Added in 4.0
239 Name.starts_with("padds.") || // Added in 8.0
240 Name.starts_with("paddus.") || // Added in 8.0
241 Name.starts_with("palignr.") || // Added in 3.9
242 Name.starts_with("pand.") || // Added in 3.9
243 Name.starts_with("pandn.") || // Added in 3.9
244 Name.starts_with("pavg") || // Added in 6.0
245 Name.starts_with("pbroadcast") || // Added in 6.0
246 Name.starts_with("pcmpeq.") || // Added in 3.9
247 Name.starts_with("pcmpgt.") || // Added in 3.9
248 Name.starts_with("perm.df.") || // Added in 3.9
249 Name.starts_with("perm.di.") || // Added in 3.9
250 Name.starts_with("permvar.") || // Added in 7.0
251 Name.starts_with("pmaddubs.w.") || // Added in 7.0
252 Name.starts_with("pmaddw.d.") || // Added in 7.0
253 Name.starts_with("pmax") || // Added in 4.0
254 Name.starts_with("pmin") || // Added in 4.0
255 Name == "pmov.qd.256" || // Added in 9.0
256 Name == "pmov.qd.512" || // Added in 9.0
257 Name == "pmov.wb.256" || // Added in 9.0
258 Name == "pmov.wb.512" || // Added in 9.0
259 Name.starts_with("pmovsx") || // Added in 4.0
260 Name.starts_with("pmovzx") || // Added in 4.0
261 Name.starts_with("pmul.dq.") || // Added in 4.0
262 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
263 Name.starts_with("pmulh.w.") || // Added in 7.0
264 Name.starts_with("pmulhu.w.") || // Added in 7.0
265 Name.starts_with("pmull.") || // Added in 4.0
266 Name.starts_with("pmultishift.qb.") || // Added in 8.0
267 Name.starts_with("pmulu.dq.") || // Added in 4.0
268 Name.starts_with("por.") || // Added in 3.9
269 Name.starts_with("prol.") || // Added in 8.0
270 Name.starts_with("prolv.") || // Added in 8.0
271 Name.starts_with("pror.") || // Added in 8.0
272 Name.starts_with("prorv.") || // Added in 8.0
273 Name.starts_with("pshuf.b.") || // Added in 4.0
274 Name.starts_with("pshuf.d.") || // Added in 3.9
275 Name.starts_with("pshufh.w.") || // Added in 3.9
276 Name.starts_with("pshufl.w.") || // Added in 3.9
277 Name.starts_with("psll.d") || // Added in 4.0
278 Name.starts_with("psll.q") || // Added in 4.0
279 Name.starts_with("psll.w") || // Added in 4.0
280 Name.starts_with("pslli") || // Added in 4.0
281 Name.starts_with("psllv") || // Added in 4.0
282 Name.starts_with("psra.d") || // Added in 4.0
283 Name.starts_with("psra.q") || // Added in 4.0
284 Name.starts_with("psra.w") || // Added in 4.0
285 Name.starts_with("psrai") || // Added in 4.0
286 Name.starts_with("psrav") || // Added in 4.0
287 Name.starts_with("psrl.d") || // Added in 4.0
288 Name.starts_with("psrl.q") || // Added in 4.0
289 Name.starts_with("psrl.w") || // Added in 4.0
290 Name.starts_with("psrli") || // Added in 4.0
291 Name.starts_with("psrlv") || // Added in 4.0
292 Name.starts_with("psub.") || // Added in 4.0
293 Name.starts_with("psubs.") || // Added in 8.0
294 Name.starts_with("psubus.") || // Added in 8.0
295 Name.starts_with("pternlog.") || // Added in 7.0
296 Name.starts_with("punpckh") || // Added in 3.9
297 Name.starts_with("punpckl") || // Added in 3.9
298 Name.starts_with("pxor.") || // Added in 3.9
299 Name.starts_with("shuf.f") || // Added in 6.0
300 Name.starts_with("shuf.i") || // Added in 6.0
301 Name.starts_with("shuf.p") || // Added in 4.0
302 Name.starts_with("sqrt.p") || // Added in 7.0
303 Name.starts_with("store.b.") || // Added in 3.9
304 Name.starts_with("store.d.") || // Added in 3.9
305 Name.starts_with("store.p") || // Added in 3.9
306 Name.starts_with("store.q.") || // Added in 3.9
307 Name.starts_with("store.w.") || // Added in 3.9
308 Name == "store.ss" || // Added in 7.0
309 Name.starts_with("storeu.") || // Added in 3.9
310 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
311 Name.starts_with("ucmp.") || // Added in 5.0
312 Name.starts_with("unpckh.") || // Added in 3.9
313 Name.starts_with("unpckl.") || // Added in 3.9
314 Name.starts_with("valign.") || // Added in 4.0
315 Name == "vcvtph2ps.128" || // Added in 11.0
316 Name == "vcvtph2ps.256" || // Added in 11.0
317 Name.starts_with("vextract") || // Added in 4.0
318 Name.starts_with("vfmadd.") || // Added in 7.0
319 Name.starts_with("vfmaddsub.") || // Added in 7.0
320 Name.starts_with("vfnmadd.") || // Added in 7.0
321 Name.starts_with("vfnmsub.") || // Added in 7.0
322 Name.starts_with("vpdpbusd.") || // Added in 7.0
323 Name.starts_with("vpdpbusds.") || // Added in 7.0
324 Name.starts_with("vpdpwssd.") || // Added in 7.0
325 Name.starts_with("vpdpwssds.") || // Added in 7.0
326 Name.starts_with("vpermi2var.") || // Added in 7.0
327 Name.starts_with("vpermil.p") || // Added in 3.9
328 Name.starts_with("vpermilvar.") || // Added in 4.0
329 Name.starts_with("vpermt2var.") || // Added in 7.0
330 Name.starts_with("vpmadd52") || // Added in 7.0
331 Name.starts_with("vpshld.") || // Added in 7.0
332 Name.starts_with("vpshldv.") || // Added in 8.0
333 Name.starts_with("vpshrd.") || // Added in 7.0
334 Name.starts_with("vpshrdv.") || // Added in 8.0
335 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
336 Name.starts_with("xor.")); // Added in 3.9
337
338 if (Name.consume_front("mask3."))
339 // 'avx512.mask3.*'
340 return (Name.starts_with("vfmadd.") || // Added in 7.0
341 Name.starts_with("vfmaddsub.") || // Added in 7.0
342 Name.starts_with("vfmsub.") || // Added in 7.0
343 Name.starts_with("vfmsubadd.") || // Added in 7.0
344 Name.starts_with("vfnmsub.")); // Added in 7.0
345
346 if (Name.consume_front("maskz."))
347 // 'avx512.maskz.*'
348 return (Name.starts_with("pternlog.") || // Added in 7.0
349 Name.starts_with("vfmadd.") || // Added in 7.0
350 Name.starts_with("vfmaddsub.") || // Added in 7.0
351 Name.starts_with("vpdpbusd.") || // Added in 7.0
352 Name.starts_with("vpdpbusds.") || // Added in 7.0
353 Name.starts_with("vpdpwssd.") || // Added in 7.0
354 Name.starts_with("vpdpwssds.") || // Added in 7.0
355 Name.starts_with("vpermt2var.") || // Added in 7.0
356 Name.starts_with("vpmadd52") || // Added in 7.0
357 Name.starts_with("vpshldv.") || // Added in 8.0
358 Name.starts_with("vpshrdv.")); // Added in 8.0
359
360 // 'avx512.*'
361 return (Name == "movntdqa" || // Added in 5.0
362 Name == "pmul.dq.512" || // Added in 7.0
363 Name == "pmulu.dq.512" || // Added in 7.0
364 Name.starts_with("broadcastm") || // Added in 6.0
365 Name.starts_with("cmp.p") || // Added in 12.0
366 Name.starts_with("cvtb2mask.") || // Added in 7.0
367 Name.starts_with("cvtd2mask.") || // Added in 7.0
368 Name.starts_with("cvtmask2") || // Added in 5.0
369 Name.starts_with("cvtq2mask.") || // Added in 7.0
370 Name == "cvtusi2sd" || // Added in 7.0
371 Name.starts_with("cvtw2mask.") || // Added in 7.0
372 Name == "kand.w" || // Added in 7.0
373 Name == "kandn.w" || // Added in 7.0
374 Name == "knot.w" || // Added in 7.0
375 Name == "kor.w" || // Added in 7.0
376 Name == "kortestc.w" || // Added in 7.0
377 Name == "kortestz.w" || // Added in 7.0
378 Name.starts_with("kunpck") || // added in 6.0
379 Name == "kxnor.w" || // Added in 7.0
380 Name == "kxor.w" || // Added in 7.0
381 Name.starts_with("padds.") || // Added in 8.0
382 Name.starts_with("pbroadcast") || // Added in 3.9
383 Name.starts_with("prol") || // Added in 8.0
384 Name.starts_with("pror") || // Added in 8.0
385 Name.starts_with("psll.dq") || // Added in 3.9
386 Name.starts_with("psrl.dq") || // Added in 3.9
387 Name.starts_with("psubs.") || // Added in 8.0
388 Name.starts_with("ptestm") || // Added in 6.0
389 Name.starts_with("ptestnm") || // Added in 6.0
390 Name.starts_with("storent.") || // Added in 3.9
391 Name.starts_with("vbroadcast.s") || // Added in 7.0
392 Name.starts_with("vpshld.") || // Added in 8.0
393 Name.starts_with("vpshrd.")); // Added in 8.0
394 }
395
396 if (Name.consume_front("fma."))
397 return (Name.starts_with("vfmadd.") || // Added in 7.0
398 Name.starts_with("vfmsub.") || // Added in 7.0
399 Name.starts_with("vfmsubadd.") || // Added in 7.0
400 Name.starts_with("vfnmadd.") || // Added in 7.0
401 Name.starts_with("vfnmsub.")); // Added in 7.0
402
403 if (Name.consume_front("fma4."))
404 return Name.starts_with("vfmadd.s"); // Added in 7.0
405
406 if (Name.consume_front("sse."))
407 return (Name == "add.ss" || // Added in 4.0
408 Name == "cvtsi2ss" || // Added in 7.0
409 Name == "cvtsi642ss" || // Added in 7.0
410 Name == "div.ss" || // Added in 4.0
411 Name == "mul.ss" || // Added in 4.0
412 Name.starts_with("sqrt.p") || // Added in 7.0
413 Name == "sqrt.ss" || // Added in 7.0
414 Name.starts_with("storeu.") || // Added in 3.9
415 Name == "sub.ss"); // Added in 4.0
416
417 if (Name.consume_front("sse2."))
418 return (Name == "add.sd" || // Added in 4.0
419 Name == "cvtdq2pd" || // Added in 3.9
420 Name == "cvtdq2ps" || // Added in 7.0
421 Name == "cvtps2pd" || // Added in 3.9
422 Name == "cvtsi2sd" || // Added in 7.0
423 Name == "cvtsi642sd" || // Added in 7.0
424 Name == "cvtss2sd" || // Added in 7.0
425 Name == "div.sd" || // Added in 4.0
426 Name == "mul.sd" || // Added in 4.0
427 Name.starts_with("padds.") || // Added in 8.0
428 Name.starts_with("paddus.") || // Added in 8.0
429 Name.starts_with("pcmpeq.") || // Added in 3.1
430 Name.starts_with("pcmpgt.") || // Added in 3.1
431 Name == "pmaxs.w" || // Added in 3.9
432 Name == "pmaxu.b" || // Added in 3.9
433 Name == "pmins.w" || // Added in 3.9
434 Name == "pminu.b" || // Added in 3.9
435 Name == "pmulu.dq" || // Added in 7.0
436 Name.starts_with("pshuf") || // Added in 3.9
437 Name.starts_with("psll.dq") || // Added in 3.7
438 Name.starts_with("psrl.dq") || // Added in 3.7
439 Name.starts_with("psubs.") || // Added in 8.0
440 Name.starts_with("psubus.") || // Added in 8.0
441 Name.starts_with("sqrt.p") || // Added in 7.0
442 Name == "sqrt.sd" || // Added in 7.0
443 Name == "storel.dq" || // Added in 3.9
444 Name.starts_with("storeu.") || // Added in 3.9
445 Name == "sub.sd"); // Added in 4.0
446
447 if (Name.consume_front("sse41."))
448 return (Name.starts_with("blendp") || // Added in 3.7
449 Name == "movntdqa" || // Added in 5.0
450 Name == "pblendw" || // Added in 3.7
451 Name == "pmaxsb" || // Added in 3.9
452 Name == "pmaxsd" || // Added in 3.9
453 Name == "pmaxud" || // Added in 3.9
454 Name == "pmaxuw" || // Added in 3.9
455 Name == "pminsb" || // Added in 3.9
456 Name == "pminsd" || // Added in 3.9
457 Name == "pminud" || // Added in 3.9
458 Name == "pminuw" || // Added in 3.9
459 Name.starts_with("pmovsx") || // Added in 3.8
460 Name.starts_with("pmovzx") || // Added in 3.9
461 Name == "pmuldq"); // Added in 7.0
462
463 if (Name.consume_front("sse42."))
464 return Name == "crc32.64.8"; // Added in 3.4
465
466 if (Name.consume_front("sse4a."))
467 return Name.starts_with("movnt."); // Added in 3.9
468
469 if (Name.consume_front("ssse3."))
470 return (Name == "pabs.b.128" || // Added in 6.0
471 Name == "pabs.d.128" || // Added in 6.0
472 Name == "pabs.w.128"); // Added in 6.0
473
474 if (Name.consume_front("xop."))
475 return (Name == "vpcmov" || // Added in 3.8
476 Name == "vpcmov.256" || // Added in 5.0
477 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
478 Name.starts_with("vprot")); // Added in 8.0
479
480 return (Name == "addcarry.u32" || // Added in 8.0
481 Name == "addcarry.u64" || // Added in 8.0
482 Name == "addcarryx.u32" || // Added in 8.0
483 Name == "addcarryx.u64" || // Added in 8.0
484 Name == "subborrow.u32" || // Added in 8.0
485 Name == "subborrow.u64" || // Added in 8.0
486 Name.starts_with("vcvtph2ps.")); // Added in 11.0
487}
488
490 Function *&NewFn) {
491 // Only handle intrinsics that start with "x86.".
492 if (!Name.consume_front("x86."))
493 return false;
494
496 NewFn = nullptr;
497 return true;
498 }
499
500 if (Name == "rdtscp") { // Added in 8.0
501 // If this intrinsic has 0 operands, it's the new version.
502 if (F->getFunctionType()->getNumParams() == 0)
503 return false;
504
505 rename(F);
506 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
507 Intrinsic::x86_rdtscp);
508 return true;
509 }
510
512
513 // SSE4.1 ptest functions may have an old signature.
514 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
516 .Case("c", Intrinsic::x86_sse41_ptestc)
517 .Case("z", Intrinsic::x86_sse41_ptestz)
518 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
521 return upgradePTESTIntrinsic(F, ID, NewFn);
522
523 return false;
524 }
525
526 // Several blend and other instructions with masks used the wrong number of
527 // bits.
528
529 // Added in 3.6
531 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
532 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
533 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
534 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
535 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
536 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
539 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
540
541 if (Name.consume_front("avx512.mask.cmp.")) {
542 // Added in 7.0
544 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
545 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
546 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
547 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
548 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
549 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
552 return upgradeX86MaskedFPCompare(F, ID, NewFn);
553 return false; // No other 'x86.avx523.mask.cmp.*'.
554 }
555
556 if (Name.consume_front("avx512bf16.")) {
557 // Added in 9.0
559 .Case("cvtne2ps2bf16.128",
560 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
561 .Case("cvtne2ps2bf16.256",
562 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
563 .Case("cvtne2ps2bf16.512",
564 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
565 .Case("mask.cvtneps2bf16.128",
566 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
567 .Case("cvtneps2bf16.256",
568 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
569 .Case("cvtneps2bf16.512",
570 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
573 return upgradeX86BF16Intrinsic(F, ID, NewFn);
574
575 // Added in 9.0
577 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
578 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
579 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
582 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
583 return false; // No other 'x86.avx512bf16.*'.
584 }
585
586 if (Name.consume_front("xop.")) {
588 if (Name.starts_with("vpermil2")) { // Added in 3.9
589 // Upgrade any XOP PERMIL2 index operand still using a float/double
590 // vector.
591 auto Idx = F->getFunctionType()->getParamType(2);
592 if (Idx->isFPOrFPVectorTy()) {
593 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
594 unsigned EltSize = Idx->getScalarSizeInBits();
595 if (EltSize == 64 && IdxSize == 128)
596 ID = Intrinsic::x86_xop_vpermil2pd;
597 else if (EltSize == 32 && IdxSize == 128)
598 ID = Intrinsic::x86_xop_vpermil2ps;
599 else if (EltSize == 64 && IdxSize == 256)
600 ID = Intrinsic::x86_xop_vpermil2pd_256;
601 else
602 ID = Intrinsic::x86_xop_vpermil2ps_256;
603 }
604 } else if (F->arg_size() == 2)
605 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
607 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
608 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
610
612 rename(F);
613 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
614 return true;
615 }
616 return false; // No other 'x86.xop.*'
617 }
618
619 if (Name == "seh.recoverfp") {
620 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
621 Intrinsic::eh_recoverfp);
622 return true;
623 }
624
625 return false;
626}
627
628// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
629// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
632 Function *&NewFn) {
633 if (Name.starts_with("rbit")) {
634 // '(arm|aarch64).rbit'.
636 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
637 return true;
638 }
639
640 if (Name == "thread.pointer") {
641 // '(arm|aarch64).thread.pointer'.
642 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
643 Intrinsic::thread_pointer);
644 return true;
645 }
646
647 bool Neon = Name.consume_front("neon.");
648 if (Neon) {
649 // '(arm|aarch64).neon.*'.
650 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
651 // v16i8 respectively.
652 if (Name.consume_front("bfdot.")) {
653 // (arm|aarch64).neon.bfdot.*'.
656 .Cases("v2f32.v8i8", "v4f32.v16i8",
657 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
658 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
661 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
662 assert((OperandWidth == 64 || OperandWidth == 128) &&
663 "Unexpected operand width");
664 LLVMContext &Ctx = F->getParent()->getContext();
665 std::array<Type *, 2> Tys{
666 {F->getReturnType(),
667 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
668 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
669 return true;
670 }
671 return false; // No other '(arm|aarch64).neon.bfdot.*'.
672 }
673
674 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
675 // anymore and accept v8bf16 instead of v16i8.
676 if (Name.consume_front("bfm")) {
677 // (arm|aarch64).neon.bfm*'.
678 if (Name.consume_back(".v4f32.v16i8")) {
679 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
682 .Case("mla",
683 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
684 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
685 .Case("lalb",
686 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
687 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
688 .Case("lalt",
689 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
690 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
693 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
694 return true;
695 }
696 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
697 }
698 return false; // No other '(arm|aarch64).neon.bfm*.
699 }
700 // Continue on to Aarch64 Neon or Arm Neon.
701 }
702 // Continue on to Arm or Aarch64.
703
704 if (IsArm) {
705 // 'arm.*'.
706 if (Neon) {
707 // 'arm.neon.*'.
709 .StartsWith("vclz.", Intrinsic::ctlz)
710 .StartsWith("vcnt.", Intrinsic::ctpop)
711 .StartsWith("vqadds.", Intrinsic::sadd_sat)
712 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
713 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
714 .StartsWith("vqsubu.", Intrinsic::usub_sat)
717 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
718 F->arg_begin()->getType());
719 return true;
720 }
721
722 if (Name.consume_front("vst")) {
723 // 'arm.neon.vst*'.
724 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
726 if (vstRegex.match(Name, &Groups)) {
727 static const Intrinsic::ID StoreInts[] = {
728 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
729 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
730
731 static const Intrinsic::ID StoreLaneInts[] = {
732 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
733 Intrinsic::arm_neon_vst4lane};
734
735 auto fArgs = F->getFunctionType()->params();
736 Type *Tys[] = {fArgs[0], fArgs[1]};
737 if (Groups[1].size() == 1)
739 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
740 else
742 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
743 return true;
744 }
745 return false; // No other 'arm.neon.vst*'.
746 }
747
748 return false; // No other 'arm.neon.*'.
749 }
750
751 if (Name.consume_front("mve.")) {
752 // 'arm.mve.*'.
753 if (Name == "vctp64") {
754 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
755 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
756 // the function and deal with it below in UpgradeIntrinsicCall.
757 rename(F);
758 return true;
759 }
760 return false; // Not 'arm.mve.vctp64'.
761 }
762
763 // These too are changed to accept a v2i1 instead of the old v4i1.
764 if (Name.consume_back(".v4i1")) {
765 // 'arm.mve.*.v4i1'.
766 if (Name.consume_back(".predicated.v2i64.v4i32"))
767 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
768 return Name == "mull.int" || Name == "vqdmull";
769
770 if (Name.consume_back(".v2i64")) {
771 // 'arm.mve.*.v2i64.v4i1'
772 bool IsGather = Name.consume_front("vldr.gather.");
773 if (IsGather || Name.consume_front("vstr.scatter.")) {
774 if (Name.consume_front("base.")) {
775 // Optional 'wb.' prefix.
776 Name.consume_front("wb.");
777 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
778 // predicated.v2i64.v2i64.v4i1'.
779 return Name == "predicated.v2i64";
780 }
781
782 if (Name.consume_front("offset.predicated."))
783 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
784 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
785
786 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
787 return false;
788 }
789
790 return false; // No other 'arm.mve.*.v2i64.v4i1'.
791 }
792 return false; // No other 'arm.mve.*.v4i1'.
793 }
794 return false; // No other 'arm.mve.*'.
795 }
796
797 if (Name.consume_front("cde.vcx")) {
798 // 'arm.cde.vcx*'.
799 if (Name.consume_back(".predicated.v2i64.v4i1"))
800 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
801 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
802 Name == "3q" || Name == "3qa";
803
804 return false; // No other 'arm.cde.vcx*'.
805 }
806 } else {
807 // 'aarch64.*'.
808 if (Neon) {
809 // 'aarch64.neon.*'.
811 .StartsWith("frintn", Intrinsic::roundeven)
812 .StartsWith("rbit", Intrinsic::bitreverse)
815 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
816 F->arg_begin()->getType());
817 return true;
818 }
819
820 if (Name.starts_with("addp")) {
821 // 'aarch64.neon.addp*'.
822 if (F->arg_size() != 2)
823 return false; // Invalid IR.
824 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
825 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
827 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
828 return true;
829 }
830 }
831 return false; // No other 'aarch64.neon.*'.
832 }
833 if (Name.consume_front("sve.")) {
834 // 'aarch64.sve.*'.
835 if (Name.consume_front("bf")) {
836 if (Name.consume_back(".lane")) {
837 // 'aarch64.sve.bf*.lane'.
840 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
841 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
842 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
845 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
846 return true;
847 }
848 return false; // No other 'aarch64.sve.bf*.lane'.
849 }
850 return false; // No other 'aarch64.sve.bf*'.
851 }
852
853 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
854 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
855 NewFn = nullptr;
856 return true;
857 }
858
859 if (Name.consume_front("addqv")) {
860 // 'aarch64.sve.addqv'.
861 if (!F->getReturnType()->isFPOrFPVectorTy())
862 return false;
863
864 auto Args = F->getFunctionType()->params();
865 Type *Tys[] = {F->getReturnType(), Args[1]};
867 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
868 return true;
869 }
870
871 if (Name.consume_front("ld")) {
872 // 'aarch64.sve.ld*'.
873 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
874 if (LdRegex.match(Name)) {
875 Type *ScalarTy =
876 cast<VectorType>(F->getReturnType())->getElementType();
877 ElementCount EC =
878 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
879 Type *Ty = VectorType::get(ScalarTy, EC);
880 static const Intrinsic::ID LoadIDs[] = {
881 Intrinsic::aarch64_sve_ld2_sret,
882 Intrinsic::aarch64_sve_ld3_sret,
883 Intrinsic::aarch64_sve_ld4_sret,
884 };
885 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
886 LoadIDs[Name[0] - '2'], Ty);
887 return true;
888 }
889 return false; // No other 'aarch64.sve.ld*'.
890 }
891
892 if (Name.consume_front("tuple.")) {
893 // 'aarch64.sve.tuple.*'.
894 if (Name.starts_with("get")) {
895 // 'aarch64.sve.tuple.get*'.
896 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
898 F->getParent(), Intrinsic::vector_extract, Tys);
899 return true;
900 }
901
902 if (Name.starts_with("set")) {
903 // 'aarch64.sve.tuple.set*'.
904 auto Args = F->getFunctionType()->params();
905 Type *Tys[] = {Args[0], Args[2], Args[1]};
907 F->getParent(), Intrinsic::vector_insert, Tys);
908 return true;
909 }
910
911 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
912 if (CreateTupleRegex.match(Name)) {
913 // 'aarch64.sve.tuple.create*'.
914 auto Args = F->getFunctionType()->params();
915 Type *Tys[] = {F->getReturnType(), Args[1]};
917 F->getParent(), Intrinsic::vector_insert, Tys);
918 return true;
919 }
920 return false; // No other 'aarch64.sve.tuple.*'.
921 }
922 return false; // No other 'aarch64.sve.*'.
923 }
924 }
925 return false; // No other 'arm.*', 'aarch64.*'.
926}
927
929 if (Name.consume_front("abs."))
931 .Case("bf16", Intrinsic::nvvm_abs_bf16)
932 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
934
935 if (Name.consume_front("fma.rn."))
937 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
938 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
939 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
940 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
941 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
942 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
943 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
944 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
945 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
946 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
947 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
948 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
950
951 if (Name.consume_front("fmax."))
953 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
954 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
955 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
956 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
957 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
958 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
959 .Case("ftz.nan.xorsign.abs.bf16",
960 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
961 .Case("ftz.nan.xorsign.abs.bf16x2",
962 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
963 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
964 .Case("ftz.xorsign.abs.bf16x2",
965 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
966 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
967 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
968 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
969 .Case("nan.xorsign.abs.bf16x2",
970 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
971 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
972 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
974
975 if (Name.consume_front("fmin."))
977 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
978 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
979 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
980 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
981 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
982 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
983 .Case("ftz.nan.xorsign.abs.bf16",
984 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
985 .Case("ftz.nan.xorsign.abs.bf16x2",
986 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
987 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
988 .Case("ftz.xorsign.abs.bf16x2",
989 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
990 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
991 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
992 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
993 .Case("nan.xorsign.abs.bf16x2",
994 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
995 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
996 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
998
999 if (Name.consume_front("neg."))
1001 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1002 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1004
1006}
1007
1009 bool CanUpgradeDebugIntrinsicsToRecords) {
1010 assert(F && "Illegal to upgrade a non-existent Function.");
1011
1012 StringRef Name = F->getName();
1013
1014 // Quickly eliminate it, if it's not a candidate.
1015 if (!Name.consume_front("llvm.") || Name.empty())
1016 return false;
1017
1018 switch (Name[0]) {
1019 default: break;
1020 case 'a': {
1021 bool IsArm = Name.consume_front("arm.");
1022 if (IsArm || Name.consume_front("aarch64.")) {
1023 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1024 return true;
1025 break;
1026 }
1027
1028 if (Name.consume_front("amdgcn.")) {
1029 if (Name == "alignbit") {
1030 // Target specific intrinsic became redundant
1032 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1033 return true;
1034 }
1035
1036 if (Name.consume_front("atomic.")) {
1037 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1038 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1039 // there's no new declaration.
1040 NewFn = nullptr;
1041 return true;
1042 }
1043 break; // No other 'amdgcn.atomic.*'
1044 }
1045
1046 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1047 Name.consume_front("flat.atomic.")) {
1048 if (Name.starts_with("fadd") ||
1049 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1050 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1051 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1052 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1053 // declaration.
1054 NewFn = nullptr;
1055 return true;
1056 }
1057 }
1058
1059 if (Name.starts_with("ldexp.")) {
1060 // Target specific intrinsic became redundant
1062 F->getParent(), Intrinsic::ldexp,
1063 {F->getReturnType(), F->getArg(1)->getType()});
1064 return true;
1065 }
1066 break; // No other 'amdgcn.*'
1067 }
1068
1069 break;
1070 }
1071 case 'c': {
1072 if (F->arg_size() == 1) {
1074 .StartsWith("ctlz.", Intrinsic::ctlz)
1075 .StartsWith("cttz.", Intrinsic::cttz)
1078 rename(F);
1079 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1080 F->arg_begin()->getType());
1081 return true;
1082 }
1083 }
1084
1085 if (F->arg_size() == 2 && Name == "coro.end") {
1086 rename(F);
1087 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1088 Intrinsic::coro_end);
1089 return true;
1090 }
1091
1092 break;
1093 }
1094 case 'd':
1095 if (Name.consume_front("dbg.")) {
1096 // Mark debug intrinsics for upgrade to new debug format.
1097 if (CanUpgradeDebugIntrinsicsToRecords &&
1098 F->getParent()->IsNewDbgInfoFormat) {
1099 if (Name == "addr" || Name == "value" || Name == "assign" ||
1100 Name == "declare" || Name == "label") {
1101 // There's no function to replace these with.
1102 NewFn = nullptr;
1103 // But we do want these to get upgraded.
1104 return true;
1105 }
1106 }
1107 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1108 // converted to DbgVariableRecords later.
1109 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1110 rename(F);
1111 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1112 Intrinsic::dbg_value);
1113 return true;
1114 }
1115 break; // No other 'dbg.*'.
1116 }
1117 break;
1118 case 'e':
1119 if (Name.consume_front("experimental.vector.")) {
1122 // Skip over extract.last.active, otherwise it will be 'upgraded'
1123 // to a regular vector extract which is a different operation.
1124 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1125 .StartsWith("extract.", Intrinsic::vector_extract)
1126 .StartsWith("insert.", Intrinsic::vector_insert)
1127 .StartsWith("splice.", Intrinsic::vector_splice)
1128 .StartsWith("reverse.", Intrinsic::vector_reverse)
1129 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1130 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1133 const auto *FT = F->getFunctionType();
1135 if (ID == Intrinsic::vector_extract ||
1136 ID == Intrinsic::vector_interleave2)
1137 // Extracting overloads the return type.
1138 Tys.push_back(FT->getReturnType());
1139 if (ID != Intrinsic::vector_interleave2)
1140 Tys.push_back(FT->getParamType(0));
1141 if (ID == Intrinsic::vector_insert)
1142 // Inserting overloads the inserted type.
1143 Tys.push_back(FT->getParamType(1));
1144 rename(F);
1145 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1146 return true;
1147 }
1148
1149 if (Name.consume_front("reduce.")) {
1151 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1152 if (R.match(Name, &Groups))
1154 .Case("add", Intrinsic::vector_reduce_add)
1155 .Case("mul", Intrinsic::vector_reduce_mul)
1156 .Case("and", Intrinsic::vector_reduce_and)
1157 .Case("or", Intrinsic::vector_reduce_or)
1158 .Case("xor", Intrinsic::vector_reduce_xor)
1159 .Case("smax", Intrinsic::vector_reduce_smax)
1160 .Case("smin", Intrinsic::vector_reduce_smin)
1161 .Case("umax", Intrinsic::vector_reduce_umax)
1162 .Case("umin", Intrinsic::vector_reduce_umin)
1163 .Case("fmax", Intrinsic::vector_reduce_fmax)
1164 .Case("fmin", Intrinsic::vector_reduce_fmin)
1166
1167 bool V2 = false;
1169 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1170 Groups.clear();
1171 V2 = true;
1172 if (R2.match(Name, &Groups))
1174 .Case("fadd", Intrinsic::vector_reduce_fadd)
1175 .Case("fmul", Intrinsic::vector_reduce_fmul)
1177 }
1179 rename(F);
1180 auto Args = F->getFunctionType()->params();
1181 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1182 {Args[V2 ? 1 : 0]});
1183 return true;
1184 }
1185 break; // No other 'expermental.vector.reduce.*'.
1186 }
1187 break; // No other 'experimental.vector.*'.
1188 }
1189 if (Name.consume_front("experimental.stepvector.")) {
1190 Intrinsic::ID ID = Intrinsic::stepvector;
1191 rename(F);
1193 F->getParent(), ID, F->getFunctionType()->getReturnType());
1194 return true;
1195 }
1196 break; // No other 'e*'.
1197 case 'f':
1198 if (Name.starts_with("flt.rounds")) {
1199 rename(F);
1200 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1201 Intrinsic::get_rounding);
1202 return true;
1203 }
1204 break;
1205 case 'i':
1206 if (Name.starts_with("invariant.group.barrier")) {
1207 // Rename invariant.group.barrier to launder.invariant.group
1208 auto Args = F->getFunctionType()->params();
1209 Type* ObjectPtr[1] = {Args[0]};
1210 rename(F);
1212 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1213 return true;
1214 }
1215 break;
1216 case 'm': {
1217 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1218 // alignment parameter to embedding the alignment as an attribute of
1219 // the pointer args.
1220 if (unsigned ID = StringSwitch<unsigned>(Name)
1221 .StartsWith("memcpy.", Intrinsic::memcpy)
1222 .StartsWith("memmove.", Intrinsic::memmove)
1223 .Default(0)) {
1224 if (F->arg_size() == 5) {
1225 rename(F);
1226 // Get the types of dest, src, and len
1227 ArrayRef<Type *> ParamTypes =
1228 F->getFunctionType()->params().slice(0, 3);
1229 NewFn =
1230 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1231 return true;
1232 }
1233 }
1234 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1235 rename(F);
1236 // Get the types of dest, and len
1237 const auto *FT = F->getFunctionType();
1238 Type *ParamTypes[2] = {
1239 FT->getParamType(0), // Dest
1240 FT->getParamType(2) // len
1241 };
1242 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1243 Intrinsic::memset, ParamTypes);
1244 return true;
1245 }
1246 break;
1247 }
1248 case 'n': {
1249 if (Name.consume_front("nvvm.")) {
1250 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1251 if (F->arg_size() == 1) {
1252 Intrinsic::ID IID =
1254 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1255 .Case("clz.i", Intrinsic::ctlz)
1256 .Case("popc.i", Intrinsic::ctpop)
1258 if (IID != Intrinsic::not_intrinsic) {
1259 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1260 {F->getReturnType()});
1261 return true;
1262 }
1263 }
1264
1265 // Check for nvvm intrinsics that need a return type adjustment.
1266 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1268 if (IID != Intrinsic::not_intrinsic) {
1269 NewFn = nullptr;
1270 return true;
1271 }
1272 }
1273
1274 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1275 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1276 //
1277 // TODO: We could add lohi.i2d.
1278 bool Expand = false;
1279 if (Name.consume_front("abs."))
1280 // nvvm.abs.{i,ii}
1281 Expand = Name == "i" || Name == "ll";
1282 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1283 Expand = true;
1284 else if (Name.consume_front("max.") || Name.consume_front("min."))
1285 // nvvm.{min,max}.{i,ii,ui,ull}
1286 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1287 Name == "ui" || Name == "ull";
1288 else if (Name.consume_front("atomic.load.add."))
1289 // nvvm.atomic.load.add.{f32.p,f64.p}
1290 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1291 else if (Name.consume_front("bitcast."))
1292 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1293 Expand =
1294 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1295 else if (Name.consume_front("rotate."))
1296 // nvvm.rotate.{b32,b64,right.b64}
1297 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1298 else if (Name.consume_front("ptr.gen.to."))
1299 // nvvm.ptr.gen.to.{local,shared,global,constant}
1300 Expand = Name.starts_with("local") || Name.starts_with("shared") ||
1301 Name.starts_with("global") || Name.starts_with("constant");
1302 else if (Name.consume_front("ptr."))
1303 // nvvm.ptr.{local,shared,global,constant}.to.gen
1304 Expand =
1305 (Name.consume_front("local") || Name.consume_front("shared") ||
1306 Name.consume_front("global") || Name.consume_front("constant")) &&
1307 Name.starts_with(".to.gen");
1308 else if (Name.consume_front("ldg.global."))
1309 // nvvm.ldg.global.{i,p,f}
1310 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1311 Name.starts_with("p."));
1312 else
1313 Expand = false;
1314
1315 if (Expand) {
1316 NewFn = nullptr;
1317 return true;
1318 }
1319 break; // No other 'nvvm.*'.
1320 }
1321 break;
1322 }
1323 case 'o':
1324 // We only need to change the name to match the mangling including the
1325 // address space.
1326 if (Name.starts_with("objectsize.")) {
1327 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1328 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1329 F->getName() !=
1330 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1331 rename(F);
1332 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1333 Intrinsic::objectsize, Tys);
1334 return true;
1335 }
1336 }
1337 break;
1338
1339 case 'p':
1340 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1341 rename(F);
1343 F->getParent(), Intrinsic::ptr_annotation,
1344 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1345 return true;
1346 }
1347 break;
1348
1349 case 'r': {
1350 if (Name.consume_front("riscv.")) {
1353 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1354 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1355 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1356 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1359 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1360 rename(F);
1361 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1362 return true;
1363 }
1364 break; // No other applicable upgrades.
1365 }
1366
1368 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1369 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1372 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1373 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1374 rename(F);
1375 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1376 return true;
1377 }
1378 break; // No other applicable upgrades.
1379 }
1380
1382 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1383 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1384 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1385 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1386 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1387 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1390 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1391 rename(F);
1392 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1393 return true;
1394 }
1395 break; // No other applicable upgrades.
1396 }
1397 break; // No other 'riscv.*' intrinsics
1398 }
1399 } break;
1400
1401 case 's':
1402 if (Name == "stackprotectorcheck") {
1403 NewFn = nullptr;
1404 return true;
1405 }
1406 break;
1407
1408 case 'v': {
1409 if (Name == "var.annotation" && F->arg_size() == 4) {
1410 rename(F);
1412 F->getParent(), Intrinsic::var_annotation,
1413 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1414 return true;
1415 }
1416 break;
1417 }
1418
1419 case 'w':
1420 if (Name.consume_front("wasm.")) {
1423 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1424 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1425 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1428 rename(F);
1429 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1430 F->getReturnType());
1431 return true;
1432 }
1433
1434 if (Name.consume_front("dot.i8x16.i7x16.")) {
1436 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1437 .Case("add.signed",
1438 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1441 rename(F);
1442 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1443 return true;
1444 }
1445 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1446 }
1447 break; // No other 'wasm.*'.
1448 }
1449 break;
1450
1451 case 'x':
1452 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1453 return true;
1454 }
1455
1456 auto *ST = dyn_cast<StructType>(F->getReturnType());
1457 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1458 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1459 // Replace return type with literal non-packed struct. Only do this for
1460 // intrinsics declared to return a struct, not for intrinsics with
1461 // overloaded return type, in which case the exact struct type will be
1462 // mangled into the name.
1465 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1466 auto *FT = F->getFunctionType();
1467 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1468 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1469 std::string Name = F->getName().str();
1470 rename(F);
1471 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1472 Name, F->getParent());
1473
1474 // The new function may also need remangling.
1475 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1476 NewFn = *Result;
1477 return true;
1478 }
1479 }
1480
1481 // Remangle our intrinsic since we upgrade the mangling
1483 if (Result != std::nullopt) {
1484 NewFn = *Result;
1485 return true;
1486 }
1487
1488 // This may not belong here. This function is effectively being overloaded
1489 // to both detect an intrinsic which needs upgrading, and to provide the
1490 // upgraded form of the intrinsic. We should perhaps have two separate
1491 // functions for this.
1492 return false;
1493}
1494
1496 bool CanUpgradeDebugIntrinsicsToRecords) {
1497 NewFn = nullptr;
1498 bool Upgraded =
1499 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1500 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1501
1502 // Upgrade intrinsic attributes. This does not change the function.
1503 if (NewFn)
1504 F = NewFn;
1505 if (Intrinsic::ID id = F->getIntrinsicID())
1506 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1507 return Upgraded;
1508}
1509
1511 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1512 GV->getName() == "llvm.global_dtors")) ||
1513 !GV->hasInitializer())
1514 return nullptr;
1515 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1516 if (!ATy)
1517 return nullptr;
1518 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1519 if (!STy || STy->getNumElements() != 2)
1520 return nullptr;
1521
1522 LLVMContext &C = GV->getContext();
1523 IRBuilder<> IRB(C);
1524 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1525 IRB.getPtrTy());
1526 Constant *Init = GV->getInitializer();
1527 unsigned N = Init->getNumOperands();
1528 std::vector<Constant *> NewCtors(N);
1529 for (unsigned i = 0; i != N; ++i) {
1530 auto Ctor = cast<Constant>(Init->getOperand(i));
1531 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1532 Ctor->getAggregateElement(1),
1534 }
1535 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1536
1537 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1538 NewInit, GV->getName());
1539}
1540
1541// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1542// to byte shuffles.
1544 unsigned Shift) {
1545 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1546 unsigned NumElts = ResultTy->getNumElements() * 8;
1547
1548 // Bitcast from a 64-bit element type to a byte element type.
1549 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1550 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1551
1552 // We'll be shuffling in zeroes.
1553 Value *Res = Constant::getNullValue(VecTy);
1554
1555 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1556 // we'll just return the zero vector.
1557 if (Shift < 16) {
1558 int Idxs[64];
1559 // 256/512-bit version is split into 2/4 16-byte lanes.
1560 for (unsigned l = 0; l != NumElts; l += 16)
1561 for (unsigned i = 0; i != 16; ++i) {
1562 unsigned Idx = NumElts + i - Shift;
1563 if (Idx < NumElts)
1564 Idx -= NumElts - 16; // end of lane, switch operand.
1565 Idxs[l + i] = Idx + l;
1566 }
1567
1568 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1569 }
1570
1571 // Bitcast back to a 64-bit element type.
1572 return Builder.CreateBitCast(Res, ResultTy, "cast");
1573}
1574
1575// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1576// to byte shuffles.
1578 unsigned Shift) {
1579 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1580 unsigned NumElts = ResultTy->getNumElements() * 8;
1581
1582 // Bitcast from a 64-bit element type to a byte element type.
1583 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1584 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1585
1586 // We'll be shuffling in zeroes.
1587 Value *Res = Constant::getNullValue(VecTy);
1588
1589 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1590 // we'll just return the zero vector.
1591 if (Shift < 16) {
1592 int Idxs[64];
1593 // 256/512-bit version is split into 2/4 16-byte lanes.
1594 for (unsigned l = 0; l != NumElts; l += 16)
1595 for (unsigned i = 0; i != 16; ++i) {
1596 unsigned Idx = i + Shift;
1597 if (Idx >= 16)
1598 Idx += NumElts - 16; // end of lane, switch operand.
1599 Idxs[l + i] = Idx + l;
1600 }
1601
1602 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1603 }
1604
1605 // Bitcast back to a 64-bit element type.
1606 return Builder.CreateBitCast(Res, ResultTy, "cast");
1607}
1608
1609static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1610 unsigned NumElts) {
1611 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1613 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1614 Mask = Builder.CreateBitCast(Mask, MaskTy);
1615
1616 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1617 // i8 and we need to extract down to the right number of elements.
1618 if (NumElts <= 4) {
1619 int Indices[4];
1620 for (unsigned i = 0; i != NumElts; ++i)
1621 Indices[i] = i;
1622 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1623 "extract");
1624 }
1625
1626 return Mask;
1627}
1628
1629static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1630 Value *Op1) {
1631 // If the mask is all ones just emit the first operation.
1632 if (const auto *C = dyn_cast<Constant>(Mask))
1633 if (C->isAllOnesValue())
1634 return Op0;
1635
1636 Mask = getX86MaskVec(Builder, Mask,
1637 cast<FixedVectorType>(Op0->getType())->getNumElements());
1638 return Builder.CreateSelect(Mask, Op0, Op1);
1639}
1640
1641static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1642 Value *Op1) {
1643 // If the mask is all ones just emit the first operation.
1644 if (const auto *C = dyn_cast<Constant>(Mask))
1645 if (C->isAllOnesValue())
1646 return Op0;
1647
1648 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1649 Mask->getType()->getIntegerBitWidth());
1650 Mask = Builder.CreateBitCast(Mask, MaskTy);
1651 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1652 return Builder.CreateSelect(Mask, Op0, Op1);
1653}
1654
1655// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1656// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1657// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1659 Value *Op1, Value *Shift,
1660 Value *Passthru, Value *Mask,
1661 bool IsVALIGN) {
1662 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1663
1664 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1665 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1666 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1667 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1668
1669 // Mask the immediate for VALIGN.
1670 if (IsVALIGN)
1671 ShiftVal &= (NumElts - 1);
1672
1673 // If palignr is shifting the pair of vectors more than the size of two
1674 // lanes, emit zero.
1675 if (ShiftVal >= 32)
1677
1678 // If palignr is shifting the pair of input vectors more than one lane,
1679 // but less than two lanes, convert to shifting in zeroes.
1680 if (ShiftVal > 16) {
1681 ShiftVal -= 16;
1682 Op1 = Op0;
1684 }
1685
1686 int Indices[64];
1687 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1688 for (unsigned l = 0; l < NumElts; l += 16) {
1689 for (unsigned i = 0; i != 16; ++i) {
1690 unsigned Idx = ShiftVal + i;
1691 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1692 Idx += NumElts - 16; // End of lane, switch operand.
1693 Indices[l + i] = Idx + l;
1694 }
1695 }
1696
1697 Value *Align = Builder.CreateShuffleVector(
1698 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1699
1700 return emitX86Select(Builder, Mask, Align, Passthru);
1701}
1702
1704 bool ZeroMask, bool IndexForm) {
1705 Type *Ty = CI.getType();
1706 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1707 unsigned EltWidth = Ty->getScalarSizeInBits();
1708 bool IsFloat = Ty->isFPOrFPVectorTy();
1709 Intrinsic::ID IID;
1710 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1711 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1712 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1713 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1714 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1715 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1716 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1717 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1718 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1719 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1720 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1721 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1722 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1723 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1724 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1725 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1726 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1727 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1728 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1729 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1730 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1731 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1732 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1733 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1734 else if (VecWidth == 128 && EltWidth == 16)
1735 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1736 else if (VecWidth == 256 && EltWidth == 16)
1737 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1738 else if (VecWidth == 512 && EltWidth == 16)
1739 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1740 else if (VecWidth == 128 && EltWidth == 8)
1741 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1742 else if (VecWidth == 256 && EltWidth == 8)
1743 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1744 else if (VecWidth == 512 && EltWidth == 8)
1745 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1746 else
1747 llvm_unreachable("Unexpected intrinsic");
1748
1749 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1750 CI.getArgOperand(2) };
1751
1752 // If this isn't index form we need to swap operand 0 and 1.
1753 if (!IndexForm)
1754 std::swap(Args[0], Args[1]);
1755
1756 Value *V = Builder.CreateIntrinsic(IID, {}, Args);
1757 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1758 : Builder.CreateBitCast(CI.getArgOperand(1),
1759 Ty);
1760 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1761}
1762
1764 Intrinsic::ID IID) {
1765 Type *Ty = CI.getType();
1766 Value *Op0 = CI.getOperand(0);
1767 Value *Op1 = CI.getOperand(1);
1768 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1769
1770 if (CI.arg_size() == 4) { // For masked intrinsics.
1771 Value *VecSrc = CI.getOperand(2);
1772 Value *Mask = CI.getOperand(3);
1773 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1774 }
1775 return Res;
1776}
1777
1779 bool IsRotateRight) {
1780 Type *Ty = CI.getType();
1781 Value *Src = CI.getArgOperand(0);
1782 Value *Amt = CI.getArgOperand(1);
1783
1784 // Amount may be scalar immediate, in which case create a splat vector.
1785 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1786 // we only care about the lowest log2 bits anyway.
1787 if (Amt->getType() != Ty) {
1788 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1789 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1790 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1791 }
1792
1793 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1794 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1795
1796 if (CI.arg_size() == 4) { // For masked intrinsics.
1797 Value *VecSrc = CI.getOperand(2);
1798 Value *Mask = CI.getOperand(3);
1799 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1800 }
1801 return Res;
1802}
1803
1804static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1805 bool IsSigned) {
1806 Type *Ty = CI.getType();
1807 Value *LHS = CI.getArgOperand(0);
1808 Value *RHS = CI.getArgOperand(1);
1809
1810 CmpInst::Predicate Pred;
1811 switch (Imm) {
1812 case 0x0:
1813 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1814 break;
1815 case 0x1:
1816 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1817 break;
1818 case 0x2:
1819 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1820 break;
1821 case 0x3:
1822 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1823 break;
1824 case 0x4:
1825 Pred = ICmpInst::ICMP_EQ;
1826 break;
1827 case 0x5:
1828 Pred = ICmpInst::ICMP_NE;
1829 break;
1830 case 0x6:
1831 return Constant::getNullValue(Ty); // FALSE
1832 case 0x7:
1833 return Constant::getAllOnesValue(Ty); // TRUE
1834 default:
1835 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1836 }
1837
1838 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1839 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1840 return Ext;
1841}
1842
1844 bool IsShiftRight, bool ZeroMask) {
1845 Type *Ty = CI.getType();
1846 Value *Op0 = CI.getArgOperand(0);
1847 Value *Op1 = CI.getArgOperand(1);
1848 Value *Amt = CI.getArgOperand(2);
1849
1850 if (IsShiftRight)
1851 std::swap(Op0, Op1);
1852
1853 // Amount may be scalar immediate, in which case create a splat vector.
1854 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1855 // we only care about the lowest log2 bits anyway.
1856 if (Amt->getType() != Ty) {
1857 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1858 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1859 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1860 }
1861
1862 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1863 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
1864
1865 unsigned NumArgs = CI.arg_size();
1866 if (NumArgs >= 4) { // For masked intrinsics.
1867 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1868 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1869 CI.getArgOperand(0);
1870 Value *Mask = CI.getOperand(NumArgs - 1);
1871 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1872 }
1873 return Res;
1874}
1875
1877 Value *Mask, bool Aligned) {
1878 // Cast the pointer to the right type.
1879 Ptr = Builder.CreateBitCast(Ptr,
1880 llvm::PointerType::getUnqual(Data->getType()));
1881 const Align Alignment =
1882 Aligned
1883 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1884 : Align(1);
1885
1886 // If the mask is all ones just emit a regular store.
1887 if (const auto *C = dyn_cast<Constant>(Mask))
1888 if (C->isAllOnesValue())
1889 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1890
1891 // Convert the mask from an integer type to a vector of i1.
1892 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1893 Mask = getX86MaskVec(Builder, Mask, NumElts);
1894 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1895}
1896
1898 Value *Passthru, Value *Mask, bool Aligned) {
1899 Type *ValTy = Passthru->getType();
1900 // Cast the pointer to the right type.
1902 const Align Alignment =
1903 Aligned
1904 ? Align(
1906 8)
1907 : Align(1);
1908
1909 // If the mask is all ones just emit a regular store.
1910 if (const auto *C = dyn_cast<Constant>(Mask))
1911 if (C->isAllOnesValue())
1912 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1913
1914 // Convert the mask from an integer type to a vector of i1.
1915 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1916 Mask = getX86MaskVec(Builder, Mask, NumElts);
1917 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1918}
1919
1920static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1921 Type *Ty = CI.getType();
1922 Value *Op0 = CI.getArgOperand(0);
1923 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
1924 {Op0, Builder.getInt1(false)});
1925 if (CI.arg_size() == 3)
1926 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1927 return Res;
1928}
1929
1930static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1931 Type *Ty = CI.getType();
1932
1933 // Arguments have a vXi32 type so cast to vXi64.
1934 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1935 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1936
1937 if (IsSigned) {
1938 // Shift left then arithmetic shift right.
1939 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1940 LHS = Builder.CreateShl(LHS, ShiftAmt);
1941 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1942 RHS = Builder.CreateShl(RHS, ShiftAmt);
1943 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1944 } else {
1945 // Clear the upper bits.
1946 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1947 LHS = Builder.CreateAnd(LHS, Mask);
1948 RHS = Builder.CreateAnd(RHS, Mask);
1949 }
1950
1951 Value *Res = Builder.CreateMul(LHS, RHS);
1952
1953 if (CI.arg_size() == 4)
1954 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1955
1956 return Res;
1957}
1958
1959// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1961 Value *Mask) {
1962 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1963 if (Mask) {
1964 const auto *C = dyn_cast<Constant>(Mask);
1965 if (!C || !C->isAllOnesValue())
1966 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1967 }
1968
1969 if (NumElts < 8) {
1970 int Indices[8];
1971 for (unsigned i = 0; i != NumElts; ++i)
1972 Indices[i] = i;
1973 for (unsigned i = NumElts; i != 8; ++i)
1974 Indices[i] = NumElts + i % NumElts;
1975 Vec = Builder.CreateShuffleVector(Vec,
1977 Indices);
1978 }
1979 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1980}
1981
1983 unsigned CC, bool Signed) {
1984 Value *Op0 = CI.getArgOperand(0);
1985 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1986
1987 Value *Cmp;
1988 if (CC == 3) {
1990 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1991 } else if (CC == 7) {
1993 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1994 } else {
1996 switch (CC) {
1997 default: llvm_unreachable("Unknown condition code");
1998 case 0: Pred = ICmpInst::ICMP_EQ; break;
1999 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2000 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2001 case 4: Pred = ICmpInst::ICMP_NE; break;
2002 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2003 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2004 }
2005 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2006 }
2007
2008 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2009
2010 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2011}
2012
2013// Replace a masked intrinsic with an older unmasked intrinsic.
2015 Intrinsic::ID IID) {
2016 Value *Rep = Builder.CreateIntrinsic(
2017 IID, {}, {CI.getArgOperand(0), CI.getArgOperand(1)});
2018 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2019}
2020
2022 Value* A = CI.getArgOperand(0);
2023 Value* B = CI.getArgOperand(1);
2024 Value* Src = CI.getArgOperand(2);
2025 Value* Mask = CI.getArgOperand(3);
2026
2027 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2028 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2029 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2030 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2031 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2032 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2033}
2034
2036 Value* Op = CI.getArgOperand(0);
2037 Type* ReturnOp = CI.getType();
2038 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2039 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2040 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2041}
2042
2043// Replace intrinsic with unmasked version and a select.
2045 CallBase &CI, Value *&Rep) {
2046 Name = Name.substr(12); // Remove avx512.mask.
2047
2048 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2049 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2050 Intrinsic::ID IID;
2051 if (Name.starts_with("max.p")) {
2052 if (VecWidth == 128 && EltWidth == 32)
2053 IID = Intrinsic::x86_sse_max_ps;
2054 else if (VecWidth == 128 && EltWidth == 64)
2055 IID = Intrinsic::x86_sse2_max_pd;
2056 else if (VecWidth == 256 && EltWidth == 32)
2057 IID = Intrinsic::x86_avx_max_ps_256;
2058 else if (VecWidth == 256 && EltWidth == 64)
2059 IID = Intrinsic::x86_avx_max_pd_256;
2060 else
2061 llvm_unreachable("Unexpected intrinsic");
2062 } else if (Name.starts_with("min.p")) {
2063 if (VecWidth == 128 && EltWidth == 32)
2064 IID = Intrinsic::x86_sse_min_ps;
2065 else if (VecWidth == 128 && EltWidth == 64)
2066 IID = Intrinsic::x86_sse2_min_pd;
2067 else if (VecWidth == 256 && EltWidth == 32)
2068 IID = Intrinsic::x86_avx_min_ps_256;
2069 else if (VecWidth == 256 && EltWidth == 64)
2070 IID = Intrinsic::x86_avx_min_pd_256;
2071 else
2072 llvm_unreachable("Unexpected intrinsic");
2073 } else if (Name.starts_with("pshuf.b.")) {
2074 if (VecWidth == 128)
2075 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2076 else if (VecWidth == 256)
2077 IID = Intrinsic::x86_avx2_pshuf_b;
2078 else if (VecWidth == 512)
2079 IID = Intrinsic::x86_avx512_pshuf_b_512;
2080 else
2081 llvm_unreachable("Unexpected intrinsic");
2082 } else if (Name.starts_with("pmul.hr.sw.")) {
2083 if (VecWidth == 128)
2084 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2085 else if (VecWidth == 256)
2086 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2087 else if (VecWidth == 512)
2088 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2089 else
2090 llvm_unreachable("Unexpected intrinsic");
2091 } else if (Name.starts_with("pmulh.w.")) {
2092 if (VecWidth == 128)
2093 IID = Intrinsic::x86_sse2_pmulh_w;
2094 else if (VecWidth == 256)
2095 IID = Intrinsic::x86_avx2_pmulh_w;
2096 else if (VecWidth == 512)
2097 IID = Intrinsic::x86_avx512_pmulh_w_512;
2098 else
2099 llvm_unreachable("Unexpected intrinsic");
2100 } else if (Name.starts_with("pmulhu.w.")) {
2101 if (VecWidth == 128)
2102 IID = Intrinsic::x86_sse2_pmulhu_w;
2103 else if (VecWidth == 256)
2104 IID = Intrinsic::x86_avx2_pmulhu_w;
2105 else if (VecWidth == 512)
2106 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2107 else
2108 llvm_unreachable("Unexpected intrinsic");
2109 } else if (Name.starts_with("pmaddw.d.")) {
2110 if (VecWidth == 128)
2111 IID = Intrinsic::x86_sse2_pmadd_wd;
2112 else if (VecWidth == 256)
2113 IID = Intrinsic::x86_avx2_pmadd_wd;
2114 else if (VecWidth == 512)
2115 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2116 else
2117 llvm_unreachable("Unexpected intrinsic");
2118 } else if (Name.starts_with("pmaddubs.w.")) {
2119 if (VecWidth == 128)
2120 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2121 else if (VecWidth == 256)
2122 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2123 else if (VecWidth == 512)
2124 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2125 else
2126 llvm_unreachable("Unexpected intrinsic");
2127 } else if (Name.starts_with("packsswb.")) {
2128 if (VecWidth == 128)
2129 IID = Intrinsic::x86_sse2_packsswb_128;
2130 else if (VecWidth == 256)
2131 IID = Intrinsic::x86_avx2_packsswb;
2132 else if (VecWidth == 512)
2133 IID = Intrinsic::x86_avx512_packsswb_512;
2134 else
2135 llvm_unreachable("Unexpected intrinsic");
2136 } else if (Name.starts_with("packssdw.")) {
2137 if (VecWidth == 128)
2138 IID = Intrinsic::x86_sse2_packssdw_128;
2139 else if (VecWidth == 256)
2140 IID = Intrinsic::x86_avx2_packssdw;
2141 else if (VecWidth == 512)
2142 IID = Intrinsic::x86_avx512_packssdw_512;
2143 else
2144 llvm_unreachable("Unexpected intrinsic");
2145 } else if (Name.starts_with("packuswb.")) {
2146 if (VecWidth == 128)
2147 IID = Intrinsic::x86_sse2_packuswb_128;
2148 else if (VecWidth == 256)
2149 IID = Intrinsic::x86_avx2_packuswb;
2150 else if (VecWidth == 512)
2151 IID = Intrinsic::x86_avx512_packuswb_512;
2152 else
2153 llvm_unreachable("Unexpected intrinsic");
2154 } else if (Name.starts_with("packusdw.")) {
2155 if (VecWidth == 128)
2156 IID = Intrinsic::x86_sse41_packusdw;
2157 else if (VecWidth == 256)
2158 IID = Intrinsic::x86_avx2_packusdw;
2159 else if (VecWidth == 512)
2160 IID = Intrinsic::x86_avx512_packusdw_512;
2161 else
2162 llvm_unreachable("Unexpected intrinsic");
2163 } else if (Name.starts_with("vpermilvar.")) {
2164 if (VecWidth == 128 && EltWidth == 32)
2165 IID = Intrinsic::x86_avx_vpermilvar_ps;
2166 else if (VecWidth == 128 && EltWidth == 64)
2167 IID = Intrinsic::x86_avx_vpermilvar_pd;
2168 else if (VecWidth == 256 && EltWidth == 32)
2169 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2170 else if (VecWidth == 256 && EltWidth == 64)
2171 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2172 else if (VecWidth == 512 && EltWidth == 32)
2173 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2174 else if (VecWidth == 512 && EltWidth == 64)
2175 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2176 else
2177 llvm_unreachable("Unexpected intrinsic");
2178 } else if (Name == "cvtpd2dq.256") {
2179 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2180 } else if (Name == "cvtpd2ps.256") {
2181 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2182 } else if (Name == "cvttpd2dq.256") {
2183 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2184 } else if (Name == "cvttps2dq.128") {
2185 IID = Intrinsic::x86_sse2_cvttps2dq;
2186 } else if (Name == "cvttps2dq.256") {
2187 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2188 } else if (Name.starts_with("permvar.")) {
2189 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2190 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2191 IID = Intrinsic::x86_avx2_permps;
2192 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2193 IID = Intrinsic::x86_avx2_permd;
2194 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2195 IID = Intrinsic::x86_avx512_permvar_df_256;
2196 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2197 IID = Intrinsic::x86_avx512_permvar_di_256;
2198 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2199 IID = Intrinsic::x86_avx512_permvar_sf_512;
2200 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2201 IID = Intrinsic::x86_avx512_permvar_si_512;
2202 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2203 IID = Intrinsic::x86_avx512_permvar_df_512;
2204 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2205 IID = Intrinsic::x86_avx512_permvar_di_512;
2206 else if (VecWidth == 128 && EltWidth == 16)
2207 IID = Intrinsic::x86_avx512_permvar_hi_128;
2208 else if (VecWidth == 256 && EltWidth == 16)
2209 IID = Intrinsic::x86_avx512_permvar_hi_256;
2210 else if (VecWidth == 512 && EltWidth == 16)
2211 IID = Intrinsic::x86_avx512_permvar_hi_512;
2212 else if (VecWidth == 128 && EltWidth == 8)
2213 IID = Intrinsic::x86_avx512_permvar_qi_128;
2214 else if (VecWidth == 256 && EltWidth == 8)
2215 IID = Intrinsic::x86_avx512_permvar_qi_256;
2216 else if (VecWidth == 512 && EltWidth == 8)
2217 IID = Intrinsic::x86_avx512_permvar_qi_512;
2218 else
2219 llvm_unreachable("Unexpected intrinsic");
2220 } else if (Name.starts_with("dbpsadbw.")) {
2221 if (VecWidth == 128)
2222 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2223 else if (VecWidth == 256)
2224 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2225 else if (VecWidth == 512)
2226 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2227 else
2228 llvm_unreachable("Unexpected intrinsic");
2229 } else if (Name.starts_with("pmultishift.qb.")) {
2230 if (VecWidth == 128)
2231 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2232 else if (VecWidth == 256)
2233 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2234 else if (VecWidth == 512)
2235 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2236 else
2237 llvm_unreachable("Unexpected intrinsic");
2238 } else if (Name.starts_with("conflict.")) {
2239 if (Name[9] == 'd' && VecWidth == 128)
2240 IID = Intrinsic::x86_avx512_conflict_d_128;
2241 else if (Name[9] == 'd' && VecWidth == 256)
2242 IID = Intrinsic::x86_avx512_conflict_d_256;
2243 else if (Name[9] == 'd' && VecWidth == 512)
2244 IID = Intrinsic::x86_avx512_conflict_d_512;
2245 else if (Name[9] == 'q' && VecWidth == 128)
2246 IID = Intrinsic::x86_avx512_conflict_q_128;
2247 else if (Name[9] == 'q' && VecWidth == 256)
2248 IID = Intrinsic::x86_avx512_conflict_q_256;
2249 else if (Name[9] == 'q' && VecWidth == 512)
2250 IID = Intrinsic::x86_avx512_conflict_q_512;
2251 else
2252 llvm_unreachable("Unexpected intrinsic");
2253 } else if (Name.starts_with("pavg.")) {
2254 if (Name[5] == 'b' && VecWidth == 128)
2255 IID = Intrinsic::x86_sse2_pavg_b;
2256 else if (Name[5] == 'b' && VecWidth == 256)
2257 IID = Intrinsic::x86_avx2_pavg_b;
2258 else if (Name[5] == 'b' && VecWidth == 512)
2259 IID = Intrinsic::x86_avx512_pavg_b_512;
2260 else if (Name[5] == 'w' && VecWidth == 128)
2261 IID = Intrinsic::x86_sse2_pavg_w;
2262 else if (Name[5] == 'w' && VecWidth == 256)
2263 IID = Intrinsic::x86_avx2_pavg_w;
2264 else if (Name[5] == 'w' && VecWidth == 512)
2265 IID = Intrinsic::x86_avx512_pavg_w_512;
2266 else
2267 llvm_unreachable("Unexpected intrinsic");
2268 } else
2269 return false;
2270
2271 SmallVector<Value *, 4> Args(CI.args());
2272 Args.pop_back();
2273 Args.pop_back();
2274 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2275 unsigned NumArgs = CI.arg_size();
2276 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2277 CI.getArgOperand(NumArgs - 2));
2278 return true;
2279}
2280
2281/// Upgrade comment in call to inline asm that represents an objc retain release
2282/// marker.
2283void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2284 size_t Pos;
2285 if (AsmStr->find("mov\tfp") == 0 &&
2286 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2287 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2288 AsmStr->replace(Pos, 1, ";");
2289 }
2290}
2291
2293 Function *F, IRBuilder<> &Builder) {
2294 Value *Rep = nullptr;
2295
2296 if (Name == "abs.i" || Name == "abs.ll") {
2297 Value *Arg = CI->getArgOperand(0);
2298 Value *Neg = Builder.CreateNeg(Arg, "neg");
2299 Value *Cmp = Builder.CreateICmpSGE(
2300 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2301 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2302 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2303 Name.starts_with("atomic.load.add.f64.p")) {
2304 Value *Ptr = CI->getArgOperand(0);
2305 Value *Val = CI->getArgOperand(1);
2306 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2307 AtomicOrdering::SequentiallyConsistent);
2308 } else if (Name.consume_front("max.") &&
2309 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2310 Name == "ui" || Name == "ull")) {
2311 Value *Arg0 = CI->getArgOperand(0);
2312 Value *Arg1 = CI->getArgOperand(1);
2313 Value *Cmp = Name.starts_with("u")
2314 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2315 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2316 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2317 } else if (Name.consume_front("min.") &&
2318 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2319 Name == "ui" || Name == "ull")) {
2320 Value *Arg0 = CI->getArgOperand(0);
2321 Value *Arg1 = CI->getArgOperand(1);
2322 Value *Cmp = Name.starts_with("u")
2323 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2324 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2325 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2326 } else if (Name == "clz.ll") {
2327 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2328 Value *Arg = CI->getArgOperand(0);
2329 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2330 {Arg, Builder.getFalse()},
2331 /*FMFSource=*/nullptr, "ctlz");
2332 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2333 } else if (Name == "popc.ll") {
2334 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2335 // i64.
2336 Value *Arg = CI->getArgOperand(0);
2337 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2338 Arg, /*FMFSource=*/nullptr, "ctpop");
2339 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2340 } else if (Name == "h2f") {
2341 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2342 {Builder.getFloatTy()}, CI->getArgOperand(0),
2343 /*FMFSource=*/nullptr, "h2f");
2344 } else if (Name.consume_front("bitcast.") &&
2345 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2346 Name == "d2ll")) {
2347 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2348 } else if (Name == "rotate.b32") {
2349 Value *Arg = CI->getOperand(0);
2350 Value *ShiftAmt = CI->getOperand(1);
2351 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2352 {Arg, Arg, ShiftAmt});
2353 } else if (Name == "rotate.b64") {
2354 Type *Int64Ty = Builder.getInt64Ty();
2355 Value *Arg = CI->getOperand(0);
2356 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2357 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2358 {Arg, Arg, ZExtShiftAmt});
2359 } else if (Name == "rotate.right.b64") {
2360 Type *Int64Ty = Builder.getInt64Ty();
2361 Value *Arg = CI->getOperand(0);
2362 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2363 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2364 {Arg, Arg, ZExtShiftAmt});
2365 } else if ((Name.consume_front("ptr.gen.to.") &&
2366 (Name.starts_with("local") || Name.starts_with("shared") ||
2367 Name.starts_with("global") || Name.starts_with("constant"))) ||
2368 (Name.consume_front("ptr.") &&
2369 (Name.consume_front("local") || Name.consume_front("shared") ||
2370 Name.consume_front("global") ||
2371 Name.consume_front("constant")) &&
2372 Name.starts_with(".to.gen"))) {
2373 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2374 } else if (Name.consume_front("ldg.global")) {
2375 Value *Ptr = CI->getArgOperand(0);
2376 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2377 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2378 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2379 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2380 MDNode *MD = MDNode::get(Builder.getContext(), {});
2381 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2382 return LD;
2383 } else {
2385 if (IID != Intrinsic::not_intrinsic &&
2386 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2387 rename(F);
2388 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2390 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2391 Value *Arg = CI->getArgOperand(I);
2392 Type *OldType = Arg->getType();
2393 Type *NewType = NewFn->getArg(I)->getType();
2394 Args.push_back(
2395 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2396 ? Builder.CreateBitCast(Arg, NewType)
2397 : Arg);
2398 }
2399 Rep = Builder.CreateCall(NewFn, Args);
2400 if (F->getReturnType()->isIntegerTy())
2401 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2402 }
2403 }
2404
2405 return Rep;
2406}
2407
2409 IRBuilder<> &Builder) {
2410 LLVMContext &C = F->getContext();
2411 Value *Rep = nullptr;
2412
2413 if (Name.starts_with("sse4a.movnt.")) {
2415 Elts.push_back(
2416 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2417 MDNode *Node = MDNode::get(C, Elts);
2418
2419 Value *Arg0 = CI->getArgOperand(0);
2420 Value *Arg1 = CI->getArgOperand(1);
2421
2422 // Nontemporal (unaligned) store of the 0'th element of the float/double
2423 // vector.
2424 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2425 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2426 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2427 Value *Extract =
2428 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2429
2430 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2431 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2432 } else if (Name.starts_with("avx.movnt.") ||
2433 Name.starts_with("avx512.storent.")) {
2435 Elts.push_back(
2436 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2437 MDNode *Node = MDNode::get(C, Elts);
2438
2439 Value *Arg0 = CI->getArgOperand(0);
2440 Value *Arg1 = CI->getArgOperand(1);
2441
2442 // Convert the type of the pointer to a pointer to the stored type.
2443 Value *BC = Builder.CreateBitCast(
2444 Arg0, PointerType::getUnqual(Arg1->getType()), "cast");
2445 StoreInst *SI = Builder.CreateAlignedStore(
2446 Arg1, BC,
2448 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2449 } else if (Name == "sse2.storel.dq") {
2450 Value *Arg0 = CI->getArgOperand(0);
2451 Value *Arg1 = CI->getArgOperand(1);
2452
2453 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2454 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2455 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2456 Value *BC = Builder.CreateBitCast(
2457 Arg0, PointerType::getUnqual(Elt->getType()), "cast");
2458 Builder.CreateAlignedStore(Elt, BC, Align(1));
2459 } else if (Name.starts_with("sse.storeu.") ||
2460 Name.starts_with("sse2.storeu.") ||
2461 Name.starts_with("avx.storeu.")) {
2462 Value *Arg0 = CI->getArgOperand(0);
2463 Value *Arg1 = CI->getArgOperand(1);
2464
2465 Arg0 = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()),
2466 "cast");
2467 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2468 } else if (Name == "avx512.mask.store.ss") {
2469 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2470 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2471 Mask, false);
2472 } else if (Name.starts_with("avx512.mask.store")) {
2473 // "avx512.mask.storeu." or "avx512.mask.store."
2474 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2475 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2476 CI->getArgOperand(2), Aligned);
2477 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2478 // Upgrade packed integer vector compare intrinsics to compare instructions.
2479 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2480 bool CmpEq = Name[9] == 'e';
2481 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2482 CI->getArgOperand(0), CI->getArgOperand(1));
2483 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2484 } else if (Name.starts_with("avx512.broadcastm")) {
2485 Type *ExtTy = Type::getInt32Ty(C);
2486 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2487 ExtTy = Type::getInt64Ty(C);
2488 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2489 ExtTy->getPrimitiveSizeInBits();
2490 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2491 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2492 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2493 Value *Vec = CI->getArgOperand(0);
2494 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2495 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2496 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2497 } else if (Name.starts_with("avx.sqrt.p") ||
2498 Name.starts_with("sse2.sqrt.p") ||
2499 Name.starts_with("sse.sqrt.p")) {
2500 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2501 {CI->getArgOperand(0)});
2502 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2503 if (CI->arg_size() == 4 &&
2504 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2505 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2506 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2507 : Intrinsic::x86_avx512_sqrt_pd_512;
2508
2509 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2510 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2511 } else {
2512 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2513 {CI->getArgOperand(0)});
2514 }
2515 Rep =
2516 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2517 } else if (Name.starts_with("avx512.ptestm") ||
2518 Name.starts_with("avx512.ptestnm")) {
2519 Value *Op0 = CI->getArgOperand(0);
2520 Value *Op1 = CI->getArgOperand(1);
2521 Value *Mask = CI->getArgOperand(2);
2522 Rep = Builder.CreateAnd(Op0, Op1);
2523 llvm::Type *Ty = Op0->getType();
2525 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2526 ? ICmpInst::ICMP_NE
2527 : ICmpInst::ICMP_EQ;
2528 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2529 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2530 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2531 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2532 ->getNumElements();
2533 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2534 Rep =
2535 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2536 } else if (Name.starts_with("avx512.kunpck")) {
2537 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2538 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2539 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2540 int Indices[64];
2541 for (unsigned i = 0; i != NumElts; ++i)
2542 Indices[i] = i;
2543
2544 // First extract half of each vector. This gives better codegen than
2545 // doing it in a single shuffle.
2546 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2547 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2548 // Concat the vectors.
2549 // NOTE: Operands have to be swapped to match intrinsic definition.
2550 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2551 Rep = Builder.CreateBitCast(Rep, CI->getType());
2552 } else if (Name == "avx512.kand.w") {
2553 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2554 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2555 Rep = Builder.CreateAnd(LHS, RHS);
2556 Rep = Builder.CreateBitCast(Rep, CI->getType());
2557 } else if (Name == "avx512.kandn.w") {
2558 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2559 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2560 LHS = Builder.CreateNot(LHS);
2561 Rep = Builder.CreateAnd(LHS, RHS);
2562 Rep = Builder.CreateBitCast(Rep, CI->getType());
2563 } else if (Name == "avx512.kor.w") {
2564 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2565 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2566 Rep = Builder.CreateOr(LHS, RHS);
2567 Rep = Builder.CreateBitCast(Rep, CI->getType());
2568 } else if (Name == "avx512.kxor.w") {
2569 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2570 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2571 Rep = Builder.CreateXor(LHS, RHS);
2572 Rep = Builder.CreateBitCast(Rep, CI->getType());
2573 } else if (Name == "avx512.kxnor.w") {
2574 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2575 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2576 LHS = Builder.CreateNot(LHS);
2577 Rep = Builder.CreateXor(LHS, RHS);
2578 Rep = Builder.CreateBitCast(Rep, CI->getType());
2579 } else if (Name == "avx512.knot.w") {
2580 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2581 Rep = Builder.CreateNot(Rep);
2582 Rep = Builder.CreateBitCast(Rep, CI->getType());
2583 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2584 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2585 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2586 Rep = Builder.CreateOr(LHS, RHS);
2587 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2588 Value *C;
2589 if (Name[14] == 'c')
2590 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2591 else
2592 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2593 Rep = Builder.CreateICmpEQ(Rep, C);
2594 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2595 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2596 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2597 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2598 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2599 Type *I32Ty = Type::getInt32Ty(C);
2600 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2601 ConstantInt::get(I32Ty, 0));
2602 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2603 ConstantInt::get(I32Ty, 0));
2604 Value *EltOp;
2605 if (Name.contains(".add."))
2606 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2607 else if (Name.contains(".sub."))
2608 EltOp = Builder.CreateFSub(Elt0, Elt1);
2609 else if (Name.contains(".mul."))
2610 EltOp = Builder.CreateFMul(Elt0, Elt1);
2611 else
2612 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2613 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2614 ConstantInt::get(I32Ty, 0));
2615 } else if (Name.starts_with("avx512.mask.pcmp")) {
2616 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2617 bool CmpEq = Name[16] == 'e';
2618 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2619 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2620 Type *OpTy = CI->getArgOperand(0)->getType();
2621 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2622 Intrinsic::ID IID;
2623 switch (VecWidth) {
2624 default:
2625 llvm_unreachable("Unexpected intrinsic");
2626 case 128:
2627 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2628 break;
2629 case 256:
2630 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2631 break;
2632 case 512:
2633 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2634 break;
2635 }
2636
2637 Rep = Builder.CreateIntrinsic(IID, {},
2638 {CI->getOperand(0), CI->getArgOperand(1)});
2639 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2640 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2641 Type *OpTy = CI->getArgOperand(0)->getType();
2642 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2643 unsigned EltWidth = OpTy->getScalarSizeInBits();
2644 Intrinsic::ID IID;
2645 if (VecWidth == 128 && EltWidth == 32)
2646 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2647 else if (VecWidth == 256 && EltWidth == 32)
2648 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2649 else if (VecWidth == 512 && EltWidth == 32)
2650 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2651 else if (VecWidth == 128 && EltWidth == 64)
2652 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2653 else if (VecWidth == 256 && EltWidth == 64)
2654 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2655 else if (VecWidth == 512 && EltWidth == 64)
2656 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2657 else
2658 llvm_unreachable("Unexpected intrinsic");
2659
2660 Rep = Builder.CreateIntrinsic(IID, {},
2661 {CI->getOperand(0), CI->getArgOperand(1)});
2662 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2663 } else if (Name.starts_with("avx512.cmp.p")) {
2664 SmallVector<Value *, 4> Args(CI->args());
2665 Type *OpTy = Args[0]->getType();
2666 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2667 unsigned EltWidth = OpTy->getScalarSizeInBits();
2668 Intrinsic::ID IID;
2669 if (VecWidth == 128 && EltWidth == 32)
2670 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2671 else if (VecWidth == 256 && EltWidth == 32)
2672 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2673 else if (VecWidth == 512 && EltWidth == 32)
2674 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2675 else if (VecWidth == 128 && EltWidth == 64)
2676 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2677 else if (VecWidth == 256 && EltWidth == 64)
2678 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2679 else if (VecWidth == 512 && EltWidth == 64)
2680 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2681 else
2682 llvm_unreachable("Unexpected intrinsic");
2683
2685 if (VecWidth == 512)
2686 std::swap(Mask, Args.back());
2687 Args.push_back(Mask);
2688
2689 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2690 } else if (Name.starts_with("avx512.mask.cmp.")) {
2691 // Integer compare intrinsics.
2692 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2693 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2694 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2695 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2696 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2697 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2698 Name.starts_with("avx512.cvtw2mask.") ||
2699 Name.starts_with("avx512.cvtd2mask.") ||
2700 Name.starts_with("avx512.cvtq2mask.")) {
2701 Value *Op = CI->getArgOperand(0);
2702 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2703 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2704 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2705 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2706 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2707 Name.starts_with("avx512.mask.pabs")) {
2708 Rep = upgradeAbs(Builder, *CI);
2709 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2710 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2711 Name.starts_with("avx512.mask.pmaxs")) {
2712 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2713 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2714 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2715 Name.starts_with("avx512.mask.pmaxu")) {
2716 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2717 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2718 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2719 Name.starts_with("avx512.mask.pmins")) {
2720 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2721 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2722 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2723 Name.starts_with("avx512.mask.pminu")) {
2724 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2725 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2726 Name == "avx512.pmulu.dq.512" ||
2727 Name.starts_with("avx512.mask.pmulu.dq.")) {
2728 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2729 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2730 Name == "avx512.pmul.dq.512" ||
2731 Name.starts_with("avx512.mask.pmul.dq.")) {
2732 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2733 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2734 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2735 Rep =
2736 Builder.CreateSIToFP(CI->getArgOperand(1),
2737 cast<VectorType>(CI->getType())->getElementType());
2738 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2739 } else if (Name == "avx512.cvtusi2sd") {
2740 Rep =
2741 Builder.CreateUIToFP(CI->getArgOperand(1),
2742 cast<VectorType>(CI->getType())->getElementType());
2743 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2744 } else if (Name == "sse2.cvtss2sd") {
2745 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2746 Rep = Builder.CreateFPExt(
2747 Rep, cast<VectorType>(CI->getType())->getElementType());
2748 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2749 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2750 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2751 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2752 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2753 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2754 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2755 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2756 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2757 Name == "avx512.mask.cvtqq2ps.256" ||
2758 Name == "avx512.mask.cvtqq2ps.512" ||
2759 Name == "avx512.mask.cvtuqq2ps.256" ||
2760 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2761 Name == "avx.cvt.ps2.pd.256" ||
2762 Name == "avx512.mask.cvtps2pd.128" ||
2763 Name == "avx512.mask.cvtps2pd.256") {
2764 auto *DstTy = cast<FixedVectorType>(CI->getType());
2765 Rep = CI->getArgOperand(0);
2766 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2767
2768 unsigned NumDstElts = DstTy->getNumElements();
2769 if (NumDstElts < SrcTy->getNumElements()) {
2770 assert(NumDstElts == 2 && "Unexpected vector size");
2771 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2772 }
2773
2774 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2775 bool IsUnsigned = Name.contains("cvtu");
2776 if (IsPS2PD)
2777 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2778 else if (CI->arg_size() == 4 &&
2779 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2780 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2781 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2782 : Intrinsic::x86_avx512_sitofp_round;
2783 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2784 {Rep, CI->getArgOperand(3)});
2785 } else {
2786 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2787 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2788 }
2789
2790 if (CI->arg_size() >= 3)
2791 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2792 CI->getArgOperand(1));
2793 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2794 Name.starts_with("vcvtph2ps.")) {
2795 auto *DstTy = cast<FixedVectorType>(CI->getType());
2796 Rep = CI->getArgOperand(0);
2797 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2798 unsigned NumDstElts = DstTy->getNumElements();
2799 if (NumDstElts != SrcTy->getNumElements()) {
2800 assert(NumDstElts == 4 && "Unexpected vector size");
2801 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2802 }
2803 Rep = Builder.CreateBitCast(
2804 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2805 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2806 if (CI->arg_size() >= 3)
2807 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2808 CI->getArgOperand(1));
2809 } else if (Name.starts_with("avx512.mask.load")) {
2810 // "avx512.mask.loadu." or "avx512.mask.load."
2811 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2812 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2813 CI->getArgOperand(2), Aligned);
2814 } else if (Name.starts_with("avx512.mask.expand.load.")) {
2815 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2816 Type *PtrTy = ResultTy->getElementType();
2817
2818 // Cast the pointer to element type.
2819 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2821
2822 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2823 ResultTy->getNumElements());
2824
2825 Rep = Builder.CreateIntrinsic(Intrinsic::masked_expandload, ResultTy,
2826 {Ptr, MaskVec, CI->getOperand(1)});
2827 } else if (Name.starts_with("avx512.mask.compress.store.")) {
2828 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2829 Type *PtrTy = ResultTy->getElementType();
2830
2831 // Cast the pointer to element type.
2832 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2834
2835 Value *MaskVec =
2836 getX86MaskVec(Builder, CI->getArgOperand(2),
2837 cast<FixedVectorType>(ResultTy)->getNumElements());
2838
2839 Rep = Builder.CreateIntrinsic(Intrinsic::masked_compressstore, ResultTy,
2840 {CI->getArgOperand(1), Ptr, MaskVec});
2841 } else if (Name.starts_with("avx512.mask.compress.") ||
2842 Name.starts_with("avx512.mask.expand.")) {
2843 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2844
2845 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2846 ResultTy->getNumElements());
2847
2848 bool IsCompress = Name[12] == 'c';
2849 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2850 : Intrinsic::x86_avx512_mask_expand;
2851 Rep = Builder.CreateIntrinsic(
2852 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
2853 } else if (Name.starts_with("xop.vpcom")) {
2854 bool IsSigned;
2855 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2856 Name.ends_with("uq"))
2857 IsSigned = false;
2858 else if (Name.ends_with("b") || Name.ends_with("w") ||
2859 Name.ends_with("d") || Name.ends_with("q"))
2860 IsSigned = true;
2861 else
2862 llvm_unreachable("Unknown suffix");
2863
2864 unsigned Imm;
2865 if (CI->arg_size() == 3) {
2866 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2867 } else {
2868 Name = Name.substr(9); // strip off "xop.vpcom"
2869 if (Name.starts_with("lt"))
2870 Imm = 0;
2871 else if (Name.starts_with("le"))
2872 Imm = 1;
2873 else if (Name.starts_with("gt"))
2874 Imm = 2;
2875 else if (Name.starts_with("ge"))
2876 Imm = 3;
2877 else if (Name.starts_with("eq"))
2878 Imm = 4;
2879 else if (Name.starts_with("ne"))
2880 Imm = 5;
2881 else if (Name.starts_with("false"))
2882 Imm = 6;
2883 else if (Name.starts_with("true"))
2884 Imm = 7;
2885 else
2886 llvm_unreachable("Unknown condition");
2887 }
2888
2889 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2890 } else if (Name.starts_with("xop.vpcmov")) {
2891 Value *Sel = CI->getArgOperand(2);
2892 Value *NotSel = Builder.CreateNot(Sel);
2893 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2894 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2895 Rep = Builder.CreateOr(Sel0, Sel1);
2896 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2897 Name.starts_with("avx512.mask.prol")) {
2898 Rep = upgradeX86Rotate(Builder, *CI, false);
2899 } else if (Name.starts_with("avx512.pror") ||
2900 Name.starts_with("avx512.mask.pror")) {
2901 Rep = upgradeX86Rotate(Builder, *CI, true);
2902 } else if (Name.starts_with("avx512.vpshld.") ||
2903 Name.starts_with("avx512.mask.vpshld") ||
2904 Name.starts_with("avx512.maskz.vpshld")) {
2905 bool ZeroMask = Name[11] == 'z';
2906 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2907 } else if (Name.starts_with("avx512.vpshrd.") ||
2908 Name.starts_with("avx512.mask.vpshrd") ||
2909 Name.starts_with("avx512.maskz.vpshrd")) {
2910 bool ZeroMask = Name[11] == 'z';
2911 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2912 } else if (Name == "sse42.crc32.64.8") {
2913 Value *Trunc0 =
2914 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2915 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8, {},
2916 {Trunc0, CI->getArgOperand(1)});
2917 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2918 } else if (Name.starts_with("avx.vbroadcast.s") ||
2919 Name.starts_with("avx512.vbroadcast.s")) {
2920 // Replace broadcasts with a series of insertelements.
2921 auto *VecTy = cast<FixedVectorType>(CI->getType());
2922 Type *EltTy = VecTy->getElementType();
2923 unsigned EltNum = VecTy->getNumElements();
2924 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2925 Type *I32Ty = Type::getInt32Ty(C);
2926 Rep = PoisonValue::get(VecTy);
2927 for (unsigned I = 0; I < EltNum; ++I)
2928 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2929 } else if (Name.starts_with("sse41.pmovsx") ||
2930 Name.starts_with("sse41.pmovzx") ||
2931 Name.starts_with("avx2.pmovsx") ||
2932 Name.starts_with("avx2.pmovzx") ||
2933 Name.starts_with("avx512.mask.pmovsx") ||
2934 Name.starts_with("avx512.mask.pmovzx")) {
2935 auto *DstTy = cast<FixedVectorType>(CI->getType());
2936 unsigned NumDstElts = DstTy->getNumElements();
2937
2938 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2939 SmallVector<int, 8> ShuffleMask(NumDstElts);
2940 for (unsigned i = 0; i != NumDstElts; ++i)
2941 ShuffleMask[i] = i;
2942
2943 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2944
2945 bool DoSext = Name.contains("pmovsx");
2946 Rep =
2947 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2948 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2949 if (CI->arg_size() == 3)
2950 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2951 CI->getArgOperand(1));
2952 } else if (Name == "avx512.mask.pmov.qd.256" ||
2953 Name == "avx512.mask.pmov.qd.512" ||
2954 Name == "avx512.mask.pmov.wb.256" ||
2955 Name == "avx512.mask.pmov.wb.512") {
2956 Type *Ty = CI->getArgOperand(1)->getType();
2957 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2958 Rep =
2959 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2960 } else if (Name.starts_with("avx.vbroadcastf128") ||
2961 Name == "avx2.vbroadcasti128") {
2962 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2963 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2964 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2965 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2966 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2967 PointerType::getUnqual(VT));
2968 Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2969 if (NumSrcElts == 2)
2970 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2971 else
2972 Rep = Builder.CreateShuffleVector(Load,
2973 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2974 } else if (Name.starts_with("avx512.mask.shuf.i") ||
2975 Name.starts_with("avx512.mask.shuf.f")) {
2976 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2977 Type *VT = CI->getType();
2978 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2979 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2980 unsigned ControlBitsMask = NumLanes - 1;
2981 unsigned NumControlBits = NumLanes / 2;
2982 SmallVector<int, 8> ShuffleMask(0);
2983
2984 for (unsigned l = 0; l != NumLanes; ++l) {
2985 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2986 // We actually need the other source.
2987 if (l >= NumLanes / 2)
2988 LaneMask += NumLanes;
2989 for (unsigned i = 0; i != NumElementsInLane; ++i)
2990 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2991 }
2992 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2993 CI->getArgOperand(1), ShuffleMask);
2994 Rep =
2995 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2996 } else if (Name.starts_with("avx512.mask.broadcastf") ||
2997 Name.starts_with("avx512.mask.broadcasti")) {
2998 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2999 ->getNumElements();
3000 unsigned NumDstElts =
3001 cast<FixedVectorType>(CI->getType())->getNumElements();
3002
3003 SmallVector<int, 8> ShuffleMask(NumDstElts);
3004 for (unsigned i = 0; i != NumDstElts; ++i)
3005 ShuffleMask[i] = i % NumSrcElts;
3006
3007 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3008 CI->getArgOperand(0), ShuffleMask);
3009 Rep =
3010 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3011 } else if (Name.starts_with("avx2.pbroadcast") ||
3012 Name.starts_with("avx2.vbroadcast") ||
3013 Name.starts_with("avx512.pbroadcast") ||
3014 Name.starts_with("avx512.mask.broadcast.s")) {
3015 // Replace vp?broadcasts with a vector shuffle.
3016 Value *Op = CI->getArgOperand(0);
3017 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3018 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3021 Rep = Builder.CreateShuffleVector(Op, M);
3022
3023 if (CI->arg_size() == 3)
3024 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3025 CI->getArgOperand(1));
3026 } else if (Name.starts_with("sse2.padds.") ||
3027 Name.starts_with("avx2.padds.") ||
3028 Name.starts_with("avx512.padds.") ||
3029 Name.starts_with("avx512.mask.padds.")) {
3030 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3031 } else if (Name.starts_with("sse2.psubs.") ||
3032 Name.starts_with("avx2.psubs.") ||
3033 Name.starts_with("avx512.psubs.") ||
3034 Name.starts_with("avx512.mask.psubs.")) {
3035 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3036 } else if (Name.starts_with("sse2.paddus.") ||
3037 Name.starts_with("avx2.paddus.") ||
3038 Name.starts_with("avx512.mask.paddus.")) {
3039 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3040 } else if (Name.starts_with("sse2.psubus.") ||
3041 Name.starts_with("avx2.psubus.") ||
3042 Name.starts_with("avx512.mask.psubus.")) {
3043 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3044 } else if (Name.starts_with("avx512.mask.palignr.")) {
3045 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3046 CI->getArgOperand(1), CI->getArgOperand(2),
3047 CI->getArgOperand(3), CI->getArgOperand(4),
3048 false);
3049 } else if (Name.starts_with("avx512.mask.valign.")) {
3051 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3052 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3053 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3054 // 128/256-bit shift left specified in bits.
3055 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3056 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3057 Shift / 8); // Shift is in bits.
3058 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3059 // 128/256-bit shift right specified in bits.
3060 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3061 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3062 Shift / 8); // Shift is in bits.
3063 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3064 Name == "avx512.psll.dq.512") {
3065 // 128/256/512-bit shift left specified in bytes.
3066 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3067 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3068 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3069 Name == "avx512.psrl.dq.512") {
3070 // 128/256/512-bit shift right specified in bytes.
3071 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3072 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3073 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3074 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3075 Name.starts_with("avx2.pblendd.")) {
3076 Value *Op0 = CI->getArgOperand(0);
3077 Value *Op1 = CI->getArgOperand(1);
3078 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3079 auto *VecTy = cast<FixedVectorType>(CI->getType());
3080 unsigned NumElts = VecTy->getNumElements();
3081
3082 SmallVector<int, 16> Idxs(NumElts);
3083 for (unsigned i = 0; i != NumElts; ++i)
3084 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3085
3086 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3087 } else if (Name.starts_with("avx.vinsertf128.") ||
3088 Name == "avx2.vinserti128" ||
3089 Name.starts_with("avx512.mask.insert")) {
3090 Value *Op0 = CI->getArgOperand(0);
3091 Value *Op1 = CI->getArgOperand(1);
3092 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3093 unsigned DstNumElts =
3094 cast<FixedVectorType>(CI->getType())->getNumElements();
3095 unsigned SrcNumElts =
3096 cast<FixedVectorType>(Op1->getType())->getNumElements();
3097 unsigned Scale = DstNumElts / SrcNumElts;
3098
3099 // Mask off the high bits of the immediate value; hardware ignores those.
3100 Imm = Imm % Scale;
3101
3102 // Extend the second operand into a vector the size of the destination.
3103 SmallVector<int, 8> Idxs(DstNumElts);
3104 for (unsigned i = 0; i != SrcNumElts; ++i)
3105 Idxs[i] = i;
3106 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3107 Idxs[i] = SrcNumElts;
3108 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3109
3110 // Insert the second operand into the first operand.
3111
3112 // Note that there is no guarantee that instruction lowering will actually
3113 // produce a vinsertf128 instruction for the created shuffles. In
3114 // particular, the 0 immediate case involves no lane changes, so it can
3115 // be handled as a blend.
3116
3117 // Example of shuffle mask for 32-bit elements:
3118 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3119 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3120
3121 // First fill with identify mask.
3122 for (unsigned i = 0; i != DstNumElts; ++i)
3123 Idxs[i] = i;
3124 // Then replace the elements where we need to insert.
3125 for (unsigned i = 0; i != SrcNumElts; ++i)
3126 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3127 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3128
3129 // If the intrinsic has a mask operand, handle that.
3130 if (CI->arg_size() == 5)
3131 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3132 CI->getArgOperand(3));
3133 } else if (Name.starts_with("avx.vextractf128.") ||
3134 Name == "avx2.vextracti128" ||
3135 Name.starts_with("avx512.mask.vextract")) {
3136 Value *Op0 = CI->getArgOperand(0);
3137 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3138 unsigned DstNumElts =
3139 cast<FixedVectorType>(CI->getType())->getNumElements();
3140 unsigned SrcNumElts =
3141 cast<FixedVectorType>(Op0->getType())->getNumElements();
3142 unsigned Scale = SrcNumElts / DstNumElts;
3143
3144 // Mask off the high bits of the immediate value; hardware ignores those.
3145 Imm = Imm % Scale;
3146
3147 // Get indexes for the subvector of the input vector.
3148 SmallVector<int, 8> Idxs(DstNumElts);
3149 for (unsigned i = 0; i != DstNumElts; ++i) {
3150 Idxs[i] = i + (Imm * DstNumElts);
3151 }
3152 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3153
3154 // If the intrinsic has a mask operand, handle that.
3155 if (CI->arg_size() == 4)
3156 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3157 CI->getArgOperand(2));
3158 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3159 Name.starts_with("avx512.mask.perm.di.")) {
3160 Value *Op0 = CI->getArgOperand(0);
3161 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3162 auto *VecTy = cast<FixedVectorType>(CI->getType());
3163 unsigned NumElts = VecTy->getNumElements();
3164
3165 SmallVector<int, 8> Idxs(NumElts);
3166 for (unsigned i = 0; i != NumElts; ++i)
3167 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3168
3169 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3170
3171 if (CI->arg_size() == 4)
3172 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3173 CI->getArgOperand(2));
3174 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3175 // The immediate permute control byte looks like this:
3176 // [1:0] - select 128 bits from sources for low half of destination
3177 // [2] - ignore
3178 // [3] - zero low half of destination
3179 // [5:4] - select 128 bits from sources for high half of destination
3180 // [6] - ignore
3181 // [7] - zero high half of destination
3182
3183 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3184
3185 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3186 unsigned HalfSize = NumElts / 2;
3187 SmallVector<int, 8> ShuffleMask(NumElts);
3188
3189 // Determine which operand(s) are actually in use for this instruction.
3190 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3191 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3192
3193 // If needed, replace operands based on zero mask.
3194 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3195 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3196
3197 // Permute low half of result.
3198 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3199 for (unsigned i = 0; i < HalfSize; ++i)
3200 ShuffleMask[i] = StartIndex + i;
3201
3202 // Permute high half of result.
3203 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3204 for (unsigned i = 0; i < HalfSize; ++i)
3205 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3206
3207 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3208
3209 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3210 Name.starts_with("avx512.mask.vpermil.p") ||
3211 Name.starts_with("avx512.mask.pshuf.d.")) {
3212 Value *Op0 = CI->getArgOperand(0);
3213 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3214 auto *VecTy = cast<FixedVectorType>(CI->getType());
3215 unsigned NumElts = VecTy->getNumElements();
3216 // Calculate the size of each index in the immediate.
3217 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3218 unsigned IdxMask = ((1 << IdxSize) - 1);
3219
3220 SmallVector<int, 8> Idxs(NumElts);
3221 // Lookup the bits for this element, wrapping around the immediate every
3222 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3223 // to offset by the first index of each group.
3224 for (unsigned i = 0; i != NumElts; ++i)
3225 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3226
3227 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3228
3229 if (CI->arg_size() == 4)
3230 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3231 CI->getArgOperand(2));
3232 } else if (Name == "sse2.pshufl.w" ||
3233 Name.starts_with("avx512.mask.pshufl.w.")) {
3234 Value *Op0 = CI->getArgOperand(0);
3235 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3236 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3237
3238 SmallVector<int, 16> Idxs(NumElts);
3239 for (unsigned l = 0; l != NumElts; l += 8) {
3240 for (unsigned i = 0; i != 4; ++i)
3241 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3242 for (unsigned i = 4; i != 8; ++i)
3243 Idxs[i + l] = i + l;
3244 }
3245
3246 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3247
3248 if (CI->arg_size() == 4)
3249 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3250 CI->getArgOperand(2));
3251 } else if (Name == "sse2.pshufh.w" ||
3252 Name.starts_with("avx512.mask.pshufh.w.")) {
3253 Value *Op0 = CI->getArgOperand(0);
3254 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3255 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3256
3257 SmallVector<int, 16> Idxs(NumElts);
3258 for (unsigned l = 0; l != NumElts; l += 8) {
3259 for (unsigned i = 0; i != 4; ++i)
3260 Idxs[i + l] = i + l;
3261 for (unsigned i = 0; i != 4; ++i)
3262 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3263 }
3264
3265 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3266
3267 if (CI->arg_size() == 4)
3268 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3269 CI->getArgOperand(2));
3270 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3271 Value *Op0 = CI->getArgOperand(0);
3272 Value *Op1 = CI->getArgOperand(1);
3273 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3274 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3275
3276 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3277 unsigned HalfLaneElts = NumLaneElts / 2;
3278
3279 SmallVector<int, 16> Idxs(NumElts);
3280 for (unsigned i = 0; i != NumElts; ++i) {
3281 // Base index is the starting element of the lane.
3282 Idxs[i] = i - (i % NumLaneElts);
3283 // If we are half way through the lane switch to the other source.
3284 if ((i % NumLaneElts) >= HalfLaneElts)
3285 Idxs[i] += NumElts;
3286 // Now select the specific element. By adding HalfLaneElts bits from
3287 // the immediate. Wrapping around the immediate every 8-bits.
3288 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3289 }
3290
3291 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3292
3293 Rep =
3294 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3295 } else if (Name.starts_with("avx512.mask.movddup") ||
3296 Name.starts_with("avx512.mask.movshdup") ||
3297 Name.starts_with("avx512.mask.movsldup")) {
3298 Value *Op0 = CI->getArgOperand(0);
3299 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3300 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3301
3302 unsigned Offset = 0;
3303 if (Name.starts_with("avx512.mask.movshdup."))
3304 Offset = 1;
3305
3306 SmallVector<int, 16> Idxs(NumElts);
3307 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3308 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3309 Idxs[i + l + 0] = i + l + Offset;
3310 Idxs[i + l + 1] = i + l + Offset;
3311 }
3312
3313 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3314
3315 Rep =
3316 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3317 } else if (Name.starts_with("avx512.mask.punpckl") ||
3318 Name.starts_with("avx512.mask.unpckl.")) {
3319 Value *Op0 = CI->getArgOperand(0);
3320 Value *Op1 = CI->getArgOperand(1);
3321 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3322 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3323
3324 SmallVector<int, 64> Idxs(NumElts);
3325 for (int l = 0; l != NumElts; l += NumLaneElts)
3326 for (int i = 0; i != NumLaneElts; ++i)
3327 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3328
3329 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3330
3331 Rep =
3332 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3333 } else if (Name.starts_with("avx512.mask.punpckh") ||
3334 Name.starts_with("avx512.mask.unpckh.")) {
3335 Value *Op0 = CI->getArgOperand(0);
3336 Value *Op1 = CI->getArgOperand(1);
3337 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3338 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3339
3340 SmallVector<int, 64> Idxs(NumElts);
3341 for (int l = 0; l != NumElts; l += NumLaneElts)
3342 for (int i = 0; i != NumLaneElts; ++i)
3343 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3344
3345 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3346
3347 Rep =
3348 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3349 } else if (Name.starts_with("avx512.mask.and.") ||
3350 Name.starts_with("avx512.mask.pand.")) {
3351 VectorType *FTy = cast<VectorType>(CI->getType());
3352 VectorType *ITy = VectorType::getInteger(FTy);
3353 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3354 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3355 Rep = Builder.CreateBitCast(Rep, FTy);
3356 Rep =
3357 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3358 } else if (Name.starts_with("avx512.mask.andn.") ||
3359 Name.starts_with("avx512.mask.pandn.")) {
3360 VectorType *FTy = cast<VectorType>(CI->getType());
3361 VectorType *ITy = VectorType::getInteger(FTy);
3362 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3363 Rep = Builder.CreateAnd(Rep,
3364 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3365 Rep = Builder.CreateBitCast(Rep, FTy);
3366 Rep =
3367 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3368 } else if (Name.starts_with("avx512.mask.or.") ||
3369 Name.starts_with("avx512.mask.por.")) {
3370 VectorType *FTy = cast<VectorType>(CI->getType());
3371 VectorType *ITy = VectorType::getInteger(FTy);
3372 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3373 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3374 Rep = Builder.CreateBitCast(Rep, FTy);
3375 Rep =
3376 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3377 } else if (Name.starts_with("avx512.mask.xor.") ||
3378 Name.starts_with("avx512.mask.pxor.")) {
3379 VectorType *FTy = cast<VectorType>(CI->getType());
3380 VectorType *ITy = VectorType::getInteger(FTy);
3381 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3382 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3383 Rep = Builder.CreateBitCast(Rep, FTy);
3384 Rep =
3385 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3386 } else if (Name.starts_with("avx512.mask.padd.")) {
3387 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3388 Rep =
3389 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3390 } else if (Name.starts_with("avx512.mask.psub.")) {
3391 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3392 Rep =
3393 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3394 } else if (Name.starts_with("avx512.mask.pmull.")) {
3395 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3396 Rep =
3397 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3398 } else if (Name.starts_with("avx512.mask.add.p")) {
3399 if (Name.ends_with(".512")) {
3400 Intrinsic::ID IID;
3401 if (Name[17] == 's')
3402 IID = Intrinsic::x86_avx512_add_ps_512;
3403 else
3404 IID = Intrinsic::x86_avx512_add_pd_512;
3405
3406 Rep = Builder.CreateIntrinsic(
3407 IID, {},
3408 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3409 } else {
3410 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3411 }
3412 Rep =
3413 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3414 } else if (Name.starts_with("avx512.mask.div.p")) {
3415 if (Name.ends_with(".512")) {
3416 Intrinsic::ID IID;
3417 if (Name[17] == 's')
3418 IID = Intrinsic::x86_avx512_div_ps_512;
3419 else
3420 IID = Intrinsic::x86_avx512_div_pd_512;
3421
3422 Rep = Builder.CreateIntrinsic(
3423 IID, {},
3424 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3425 } else {
3426 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3427 }
3428 Rep =
3429 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3430 } else if (Name.starts_with("avx512.mask.mul.p")) {
3431 if (Name.ends_with(".512")) {
3432 Intrinsic::ID IID;
3433 if (Name[17] == 's')
3434 IID = Intrinsic::x86_avx512_mul_ps_512;
3435 else
3436 IID = Intrinsic::x86_avx512_mul_pd_512;
3437
3438 Rep = Builder.CreateIntrinsic(
3439 IID, {},
3440 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3441 } else {
3442 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3443 }
3444 Rep =
3445 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3446 } else if (Name.starts_with("avx512.mask.sub.p")) {
3447 if (Name.ends_with(".512")) {
3448 Intrinsic::ID IID;
3449 if (Name[17] == 's')
3450 IID = Intrinsic::x86_avx512_sub_ps_512;
3451 else
3452 IID = Intrinsic::x86_avx512_sub_pd_512;
3453
3454 Rep = Builder.CreateIntrinsic(
3455 IID, {},
3456 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3457 } else {
3458 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3459 }
3460 Rep =
3461 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3462 } else if ((Name.starts_with("avx512.mask.max.p") ||
3463 Name.starts_with("avx512.mask.min.p")) &&
3464 Name.drop_front(18) == ".512") {
3465 bool IsDouble = Name[17] == 'd';
3466 bool IsMin = Name[13] == 'i';
3467 static const Intrinsic::ID MinMaxTbl[2][2] = {
3468 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3469 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3470 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3471
3472 Rep = Builder.CreateIntrinsic(
3473 IID, {},
3474 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3475 Rep =
3476 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3477 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3478 Rep =
3479 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3480 {CI->getArgOperand(0), Builder.getInt1(false)});
3481 Rep =
3482 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3483 } else if (Name.starts_with("avx512.mask.psll")) {
3484 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3485 bool IsVariable = Name[16] == 'v';
3486 char Size = Name[16] == '.' ? Name[17]
3487 : Name[17] == '.' ? Name[18]
3488 : Name[18] == '.' ? Name[19]
3489 : Name[20];
3490
3491 Intrinsic::ID IID;
3492 if (IsVariable && Name[17] != '.') {
3493 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3494 IID = Intrinsic::x86_avx2_psllv_q;
3495 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3496 IID = Intrinsic::x86_avx2_psllv_q_256;
3497 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3498 IID = Intrinsic::x86_avx2_psllv_d;
3499 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3500 IID = Intrinsic::x86_avx2_psllv_d_256;
3501 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3502 IID = Intrinsic::x86_avx512_psllv_w_128;
3503 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3504 IID = Intrinsic::x86_avx512_psllv_w_256;
3505 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3506 IID = Intrinsic::x86_avx512_psllv_w_512;
3507 else
3508 llvm_unreachable("Unexpected size");
3509 } else if (Name.ends_with(".128")) {
3510 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3511 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3512 : Intrinsic::x86_sse2_psll_d;
3513 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3514 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3515 : Intrinsic::x86_sse2_psll_q;
3516 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3517 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3518 : Intrinsic::x86_sse2_psll_w;
3519 else
3520 llvm_unreachable("Unexpected size");
3521 } else if (Name.ends_with(".256")) {
3522 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3523 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3524 : Intrinsic::x86_avx2_psll_d;
3525 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3526 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3527 : Intrinsic::x86_avx2_psll_q;
3528 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3529 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3530 : Intrinsic::x86_avx2_psll_w;
3531 else
3532 llvm_unreachable("Unexpected size");
3533 } else {
3534 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3535 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3536 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3537 : Intrinsic::x86_avx512_psll_d_512;
3538 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3539 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3540 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3541 : Intrinsic::x86_avx512_psll_q_512;
3542 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3543 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3544 : Intrinsic::x86_avx512_psll_w_512;
3545 else
3546 llvm_unreachable("Unexpected size");
3547 }
3548
3549 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3550 } else if (Name.starts_with("avx512.mask.psrl")) {
3551 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3552 bool IsVariable = Name[16] == 'v';
3553 char Size = Name[16] == '.' ? Name[17]
3554 : Name[17] == '.' ? Name[18]
3555 : Name[18] == '.' ? Name[19]
3556 : Name[20];
3557
3558 Intrinsic::ID IID;
3559 if (IsVariable && Name[17] != '.') {
3560 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3561 IID = Intrinsic::x86_avx2_psrlv_q;
3562 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3563 IID = Intrinsic::x86_avx2_psrlv_q_256;
3564 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3565 IID = Intrinsic::x86_avx2_psrlv_d;
3566 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3567 IID = Intrinsic::x86_avx2_psrlv_d_256;
3568 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3569 IID = Intrinsic::x86_avx512_psrlv_w_128;
3570 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3571 IID = Intrinsic::x86_avx512_psrlv_w_256;
3572 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3573 IID = Intrinsic::x86_avx512_psrlv_w_512;
3574 else
3575 llvm_unreachable("Unexpected size");
3576 } else if (Name.ends_with(".128")) {
3577 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3578 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3579 : Intrinsic::x86_sse2_psrl_d;
3580 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3581 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3582 : Intrinsic::x86_sse2_psrl_q;
3583 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3584 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3585 : Intrinsic::x86_sse2_psrl_w;
3586 else
3587 llvm_unreachable("Unexpected size");
3588 } else if (Name.ends_with(".256")) {
3589 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3590 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3591 : Intrinsic::x86_avx2_psrl_d;
3592 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3593 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3594 : Intrinsic::x86_avx2_psrl_q;
3595 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3596 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3597 : Intrinsic::x86_avx2_psrl_w;
3598 else
3599 llvm_unreachable("Unexpected size");
3600 } else {
3601 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3602 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3603 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3604 : Intrinsic::x86_avx512_psrl_d_512;
3605 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3606 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3607 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3608 : Intrinsic::x86_avx512_psrl_q_512;
3609 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3610 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3611 : Intrinsic::x86_avx512_psrl_w_512;
3612 else
3613 llvm_unreachable("Unexpected size");
3614 }
3615
3616 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3617 } else if (Name.starts_with("avx512.mask.psra")) {
3618 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3619 bool IsVariable = Name[16] == 'v';
3620 char Size = Name[16] == '.' ? Name[17]
3621 : Name[17] == '.' ? Name[18]
3622 : Name[18] == '.' ? Name[19]
3623 : Name[20];
3624
3625 Intrinsic::ID IID;
3626 if (IsVariable && Name[17] != '.') {
3627 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3628 IID = Intrinsic::x86_avx2_psrav_d;
3629 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3630 IID = Intrinsic::x86_avx2_psrav_d_256;
3631 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3632 IID = Intrinsic::x86_avx512_psrav_w_128;
3633 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3634 IID = Intrinsic::x86_avx512_psrav_w_256;
3635 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3636 IID = Intrinsic::x86_avx512_psrav_w_512;
3637 else
3638 llvm_unreachable("Unexpected size");
3639 } else if (Name.ends_with(".128")) {
3640 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3641 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3642 : Intrinsic::x86_sse2_psra_d;
3643 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3644 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3645 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3646 : Intrinsic::x86_avx512_psra_q_128;
3647 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3648 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3649 : Intrinsic::x86_sse2_psra_w;
3650 else
3651 llvm_unreachable("Unexpected size");
3652 } else if (Name.ends_with(".256")) {
3653 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3654 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3655 : Intrinsic::x86_avx2_psra_d;
3656 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3657 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3658 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3659 : Intrinsic::x86_avx512_psra_q_256;
3660 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3661 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3662 : Intrinsic::x86_avx2_psra_w;
3663 else
3664 llvm_unreachable("Unexpected size");
3665 } else {
3666 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3667 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3668 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3669 : Intrinsic::x86_avx512_psra_d_512;
3670 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3671 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3672 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3673 : Intrinsic::x86_avx512_psra_q_512;
3674 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3675 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3676 : Intrinsic::x86_avx512_psra_w_512;
3677 else
3678 llvm_unreachable("Unexpected size");
3679 }
3680
3681 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3682 } else if (Name.starts_with("avx512.mask.move.s")) {
3683 Rep = upgradeMaskedMove(Builder, *CI);
3684 } else if (Name.starts_with("avx512.cvtmask2")) {
3685 Rep = upgradeMaskToInt(Builder, *CI);
3686 } else if (Name.ends_with(".movntdqa")) {
3688 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3689
3690 Value *Ptr = CI->getArgOperand(0);
3691
3692 // Convert the type of the pointer to a pointer to the stored type.
3693 Value *BC = Builder.CreateBitCast(
3694 Ptr, PointerType::getUnqual(CI->getType()), "cast");
3695 LoadInst *LI = Builder.CreateAlignedLoad(
3696 CI->getType(), BC,
3698 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3699 Rep = LI;
3700 } else if (Name.starts_with("fma.vfmadd.") ||
3701 Name.starts_with("fma.vfmsub.") ||
3702 Name.starts_with("fma.vfnmadd.") ||
3703 Name.starts_with("fma.vfnmsub.")) {
3704 bool NegMul = Name[6] == 'n';
3705 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3706 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3707
3708 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3709 CI->getArgOperand(2)};
3710
3711 if (IsScalar) {
3712 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3713 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3714 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3715 }
3716
3717 if (NegMul && !IsScalar)
3718 Ops[0] = Builder.CreateFNeg(Ops[0]);
3719 if (NegMul && IsScalar)
3720 Ops[1] = Builder.CreateFNeg(Ops[1]);
3721 if (NegAcc)
3722 Ops[2] = Builder.CreateFNeg(Ops[2]);
3723
3724 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3725
3726 if (IsScalar)
3727 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3728 } else if (Name.starts_with("fma4.vfmadd.s")) {
3729 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3730 CI->getArgOperand(2)};
3731
3732 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3733 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3734 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3735
3736 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3737
3739 Rep, (uint64_t)0);
3740 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3741 Name.starts_with("avx512.maskz.vfmadd.s") ||
3742 Name.starts_with("avx512.mask3.vfmadd.s") ||
3743 Name.starts_with("avx512.mask3.vfmsub.s") ||
3744 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3745 bool IsMask3 = Name[11] == '3';
3746 bool IsMaskZ = Name[11] == 'z';
3747 // Drop the "avx512.mask." to make it easier.
3748 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3749 bool NegMul = Name[2] == 'n';
3750 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3751
3752 Value *A = CI->getArgOperand(0);
3753 Value *B = CI->getArgOperand(1);
3754 Value *C = CI->getArgOperand(2);
3755
3756 if (NegMul && (IsMask3 || IsMaskZ))
3757 A = Builder.CreateFNeg(A);
3758 if (NegMul && !(IsMask3 || IsMaskZ))
3759 B = Builder.CreateFNeg(B);
3760 if (NegAcc)
3761 C = Builder.CreateFNeg(C);
3762
3763 A = Builder.CreateExtractElement(A, (uint64_t)0);
3764 B = Builder.CreateExtractElement(B, (uint64_t)0);
3765 C = Builder.CreateExtractElement(C, (uint64_t)0);
3766
3767 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3768 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3769 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3770
3771 Intrinsic::ID IID;
3772 if (Name.back() == 'd')
3773 IID = Intrinsic::x86_avx512_vfmadd_f64;
3774 else
3775 IID = Intrinsic::x86_avx512_vfmadd_f32;
3776 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3777 } else {
3778 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3779 }
3780
3781 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3782 : IsMask3 ? C
3783 : A;
3784
3785 // For Mask3 with NegAcc, we need to create a new extractelement that
3786 // avoids the negation above.
3787 if (NegAcc && IsMask3)
3788 PassThru =
3789 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3790
3791 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3792 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3793 (uint64_t)0);
3794 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3795 Name.starts_with("avx512.mask.vfnmadd.p") ||
3796 Name.starts_with("avx512.mask.vfnmsub.p") ||
3797 Name.starts_with("avx512.mask3.vfmadd.p") ||
3798 Name.starts_with("avx512.mask3.vfmsub.p") ||
3799 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3800 Name.starts_with("avx512.maskz.vfmadd.p")) {
3801 bool IsMask3 = Name[11] == '3';
3802 bool IsMaskZ = Name[11] == 'z';
3803 // Drop the "avx512.mask." to make it easier.
3804 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3805 bool NegMul = Name[2] == 'n';
3806 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3807
3808 Value *A = CI->getArgOperand(0);
3809 Value *B = CI->getArgOperand(1);
3810 Value *C = CI->getArgOperand(2);
3811
3812 if (NegMul && (IsMask3 || IsMaskZ))
3813 A = Builder.CreateFNeg(A);
3814 if (NegMul && !(IsMask3 || IsMaskZ))
3815 B = Builder.CreateFNeg(B);
3816 if (NegAcc)
3817 C = Builder.CreateFNeg(C);
3818
3819 if (CI->arg_size() == 5 &&
3820 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3821 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3822 Intrinsic::ID IID;
3823 // Check the character before ".512" in string.
3824 if (Name[Name.size() - 5] == 's')
3825 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3826 else
3827 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3828
3829 Rep = Builder.CreateIntrinsic(IID, {}, {A, B, C, CI->getArgOperand(4)});
3830 } else {
3831 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3832 }
3833
3834 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3835 : IsMask3 ? CI->getArgOperand(2)
3836 : CI->getArgOperand(0);
3837
3838 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3839 } else if (Name.starts_with("fma.vfmsubadd.p")) {
3840 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3841 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3842 Intrinsic::ID IID;
3843 if (VecWidth == 128 && EltWidth == 32)
3844 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3845 else if (VecWidth == 256 && EltWidth == 32)
3846 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3847 else if (VecWidth == 128 && EltWidth == 64)
3848 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3849 else if (VecWidth == 256 && EltWidth == 64)
3850 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3851 else
3852 llvm_unreachable("Unexpected intrinsic");
3853
3854 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3855 CI->getArgOperand(2)};
3856 Ops[2] = Builder.CreateFNeg(Ops[2]);
3857 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3858 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3859 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3860 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3861 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3862 bool IsMask3 = Name[11] == '3';
3863 bool IsMaskZ = Name[11] == 'z';
3864 // Drop the "avx512.mask." to make it easier.
3865 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3866 bool IsSubAdd = Name[3] == 's';
3867 if (CI->arg_size() == 5) {
3868 Intrinsic::ID IID;
3869 // Check the character before ".512" in string.
3870 if (Name[Name.size() - 5] == 's')
3871 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3872 else
3873 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3874
3875 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3876 CI->getArgOperand(2), CI->getArgOperand(4)};
3877 if (IsSubAdd)
3878 Ops[2] = Builder.CreateFNeg(Ops[2]);
3879
3880 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3881 } else {
3882 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3883
3884 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3885 CI->getArgOperand(2)};
3886
3888 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
3889 Value *Odd = Builder.CreateCall(FMA, Ops);
3890 Ops[2] = Builder.CreateFNeg(Ops[2]);
3891 Value *Even = Builder.CreateCall(FMA, Ops);
3892
3893 if (IsSubAdd)
3894 std::swap(Even, Odd);
3895
3896 SmallVector<int, 32> Idxs(NumElts);
3897 for (int i = 0; i != NumElts; ++i)
3898 Idxs[i] = i + (i % 2) * NumElts;
3899
3900 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3901 }
3902
3903 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3904 : IsMask3 ? CI->getArgOperand(2)
3905 : CI->getArgOperand(0);
3906
3907 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3908 } else if (Name.starts_with("avx512.mask.pternlog.") ||
3909 Name.starts_with("avx512.maskz.pternlog.")) {
3910 bool ZeroMask = Name[11] == 'z';
3911 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3912 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3913 Intrinsic::ID IID;
3914 if (VecWidth == 128 && EltWidth == 32)
3915 IID = Intrinsic::x86_avx512_pternlog_d_128;
3916 else if (VecWidth == 256 && EltWidth == 32)
3917 IID = Intrinsic::x86_avx512_pternlog_d_256;
3918 else if (VecWidth == 512 && EltWidth == 32)
3919 IID = Intrinsic::x86_avx512_pternlog_d_512;
3920 else if (VecWidth == 128 && EltWidth == 64)
3921 IID = Intrinsic::x86_avx512_pternlog_q_128;
3922 else if (VecWidth == 256 && EltWidth == 64)
3923 IID = Intrinsic::x86_avx512_pternlog_q_256;
3924 else if (VecWidth == 512 && EltWidth == 64)
3925 IID = Intrinsic::x86_avx512_pternlog_q_512;
3926 else
3927 llvm_unreachable("Unexpected intrinsic");
3928
3929 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3930 CI->getArgOperand(2), CI->getArgOperand(3)};
3931 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3932 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3933 : CI->getArgOperand(0);
3934 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3935 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
3936 Name.starts_with("avx512.maskz.vpmadd52")) {
3937 bool ZeroMask = Name[11] == 'z';
3938 bool High = Name[20] == 'h' || Name[21] == 'h';
3939 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3940 Intrinsic::ID IID;
3941 if (VecWidth == 128 && !High)
3942 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3943 else if (VecWidth == 256 && !High)
3944 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3945 else if (VecWidth == 512 && !High)
3946 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3947 else if (VecWidth == 128 && High)
3948 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3949 else if (VecWidth == 256 && High)
3950 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3951 else if (VecWidth == 512 && High)
3952 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3953 else
3954 llvm_unreachable("Unexpected intrinsic");
3955
3956 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3957 CI->getArgOperand(2)};
3958 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3959 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3960 : CI->getArgOperand(0);
3961 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3962 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3963 Name.starts_with("avx512.mask.vpermt2var.") ||
3964 Name.starts_with("avx512.maskz.vpermt2var.")) {
3965 bool ZeroMask = Name[11] == 'z';
3966 bool IndexForm = Name[17] == 'i';
3967 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3968 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3969 Name.starts_with("avx512.maskz.vpdpbusd.") ||
3970 Name.starts_with("avx512.mask.vpdpbusds.") ||
3971 Name.starts_with("avx512.maskz.vpdpbusds.")) {
3972 bool ZeroMask = Name[11] == 'z';
3973 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3974 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3975 Intrinsic::ID IID;
3976 if (VecWidth == 128 && !IsSaturating)
3977 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3978 else if (VecWidth == 256 && !IsSaturating)
3979 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3980 else if (VecWidth == 512 && !IsSaturating)
3981 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3982 else if (VecWidth == 128 && IsSaturating)
3983 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3984 else if (VecWidth == 256 && IsSaturating)
3985 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3986 else if (VecWidth == 512 && IsSaturating)
3987 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3988 else
3989 llvm_unreachable("Unexpected intrinsic");
3990
3991 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3992 CI->getArgOperand(2)};
3993 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3994 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3995 : CI->getArgOperand(0);
3996 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3997 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3998 Name.starts_with("avx512.maskz.vpdpwssd.") ||
3999 Name.starts_with("avx512.mask.vpdpwssds.") ||
4000 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4001 bool ZeroMask = Name[11] == 'z';
4002 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4003 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4004 Intrinsic::ID IID;
4005 if (VecWidth == 128 && !IsSaturating)
4006 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4007 else if (VecWidth == 256 && !IsSaturating)
4008 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4009 else if (VecWidth == 512 && !IsSaturating)
4010 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4011 else if (VecWidth == 128 && IsSaturating)
4012 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4013 else if (VecWidth == 256 && IsSaturating)
4014 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4015 else if (VecWidth == 512 && IsSaturating)
4016 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4017 else
4018 llvm_unreachable("Unexpected intrinsic");
4019
4020 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4021 CI->getArgOperand(2)};
4022 Rep = Builder.CreateIntrinsic(IID, {}, Args);
4023 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4024 : CI->getArgOperand(0);
4025 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4026 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4027 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4028 Name == "subborrow.u32" || Name == "subborrow.u64") {
4029 Intrinsic::ID IID;
4030 if (Name[0] == 'a' && Name.back() == '2')
4031 IID = Intrinsic::x86_addcarry_32;
4032 else if (Name[0] == 'a' && Name.back() == '4')
4033 IID = Intrinsic::x86_addcarry_64;
4034 else if (Name[0] == 's' && Name.back() == '2')
4035 IID = Intrinsic::x86_subborrow_32;
4036 else if (Name[0] == 's' && Name.back() == '4')
4037 IID = Intrinsic::x86_subborrow_64;
4038 else
4039 llvm_unreachable("Unexpected intrinsic");
4040
4041 // Make a call with 3 operands.
4042 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4043 CI->getArgOperand(2)};
4044 Value *NewCall = Builder.CreateIntrinsic(IID, {}, Args);
4045
4046 // Extract the second result and store it.
4047 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4048 // Cast the pointer to the right type.
4049 Value *Ptr = Builder.CreateBitCast(
4050 CI->getArgOperand(3), llvm::PointerType::getUnqual(Data->getType()));
4051 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4052 // Replace the original call result with the first result of the new call.
4053 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4054
4055 CI->replaceAllUsesWith(CF);
4056 Rep = nullptr;
4057 } else if (Name.starts_with("avx512.mask.") &&
4058 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4059 // Rep will be updated by the call in the condition.
4060 }
4061
4062 return Rep;
4063}
4064
4066 Function *F, IRBuilder<> &Builder) {
4067 Intrinsic::ID NewID =
4069 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4070 .Case("sve.fcvtnt.bf16f32", Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4072 if (NewID == Intrinsic::not_intrinsic)
4073 llvm_unreachable("Unhandled Intrinsic!");
4074
4075 SmallVector<Value *, 3> Args(CI->args());
4076
4077 // The original intrinsics incorrectly used a predicate based on the smallest
4078 // element type rather than the largest.
4079 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4080 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4081
4082 if (Args[1]->getType() != BadPredTy)
4083 llvm_unreachable("Unexpected predicate type!");
4084
4085 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4086 BadPredTy, Args[1]);
4087 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
4088 GoodPredTy, Args[1]);
4089
4090 return Builder.CreateIntrinsic(NewID, {}, Args, /*FMFSource=*/nullptr,
4091 CI->getName());
4092}
4093
4095 IRBuilder<> &Builder) {
4096 if (Name == "mve.vctp64.old") {
4097 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4098 // correct type.
4099 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4100 CI->getArgOperand(0),
4101 /*FMFSource=*/nullptr, CI->getName());
4102 Value *C1 = Builder.CreateIntrinsic(
4103 Intrinsic::arm_mve_pred_v2i,
4104 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4105 return Builder.CreateIntrinsic(
4106 Intrinsic::arm_mve_pred_i2v,
4107 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4108 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4109 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4110 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4111 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4112 Name ==
4113 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4114 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4115 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4116 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4117 Name ==
4118 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4119 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4120 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4121 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4122 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4123