LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/GlobalValue.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsAArch64.h"
36#include "llvm/IR/IntrinsicsAMDGPU.h"
37#include "llvm/IR/IntrinsicsARM.h"
38#include "llvm/IR/IntrinsicsNVPTX.h"
39#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/IR/IntrinsicsWebAssembly.h"
41#include "llvm/IR/IntrinsicsX86.h"
42#include "llvm/IR/LLVMContext.h"
43#include "llvm/IR/MDBuilder.h"
44#include "llvm/IR/Metadata.h"
45#include "llvm/IR/Module.h"
46#include "llvm/IR/Value.h"
47#include "llvm/IR/Verifier.h"
53#include "llvm/Support/Regex.h"
56#include <cstdint>
57#include <cstring>
58#include <numeric>
59
60using namespace llvm;
61
62static cl::opt<bool>
63 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
64 cl::desc("Disable autoupgrade of debug info"));
65
66static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
67
68// Report a fatal error along with the
69// Call Instruction which caused the error
70[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
71 CallBase *CI) {
72 CI->print(llvm::errs());
73 llvm::errs() << "\n";
75}
76
77// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
78// changed their type from v4f32 to v2i64.
80 Function *&NewFn) {
81 // Check whether this is an old version of the function, which received
82 // v4f32 arguments.
83 Type *Arg0Type = F->getFunctionType()->getParamType(0);
84 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
85 return false;
86
87 // Yes, it's old, replace it with new version.
88 rename(F);
89 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
90 return true;
91}
92
93// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
94// arguments have changed their type from i32 to i8.
96 Function *&NewFn) {
97 // Check that the last argument is an i32.
98 Type *LastArgType = F->getFunctionType()->getParamType(
99 F->getFunctionType()->getNumParams() - 1);
100 if (!LastArgType->isIntegerTy(32))
101 return false;
102
103 // Move this function aside and map down.
104 rename(F);
105 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
106 return true;
107}
108
109// Upgrade the declaration of fp compare intrinsics that change return type
110// from scalar to vXi1 mask.
112 Function *&NewFn) {
113 // Check if the return type is a vector.
114 if (F->getReturnType()->isVectorTy())
115 return false;
116
117 rename(F);
118 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
119 return true;
120}
121
122// Upgrade the declaration of multiply and add bytes intrinsics whose input
123// arguments' types have changed from vectors of i32 to vectors of i8
125 Function *&NewFn) {
126 // check if input argument type is a vector of i8
127 Type *Arg1Type = F->getFunctionType()->getParamType(1);
128 Type *Arg2Type = F->getFunctionType()->getParamType(2);
129 if (Arg1Type->isVectorTy() &&
130 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
131 Arg2Type->isVectorTy() &&
132 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
133 return false;
134
135 rename(F);
136 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
137 return true;
138}
139
140// Upgrade the declaration of multipy and add words intrinsics whose input
141// arguments' types have changed to vectors of i32 to vectors of i16
143 Function *&NewFn) {
144 // check if input argument type is a vector of i16
145 Type *Arg1Type = F->getFunctionType()->getParamType(1);
146 Type *Arg2Type = F->getFunctionType()->getParamType(2);
147 if (Arg1Type->isVectorTy() &&
148 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
149 Arg2Type->isVectorTy() &&
150 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
151 return false;
152
153 rename(F);
154 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
155 return true;
156}
157
159 Function *&NewFn) {
160 if (F->getReturnType()->getScalarType()->isBFloatTy())
161 return false;
162
163 rename(F);
164 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
165 return true;
166}
167
169 Function *&NewFn) {
170 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
171 return false;
172
173 rename(F);
174 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
175 return true;
176}
177
179 // All of the intrinsics matches below should be marked with which llvm
180 // version started autoupgrading them. At some point in the future we would
181 // like to use this information to remove upgrade code for some older
182 // intrinsics. It is currently undecided how we will determine that future
183 // point.
184 if (Name.consume_front("avx."))
185 return (Name.starts_with("blend.p") || // Added in 3.7
186 Name == "cvt.ps2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.pd.256" || // Added in 3.9
188 Name == "cvtdq2.ps.256" || // Added in 7.0
189 Name.starts_with("movnt.") || // Added in 3.2
190 Name.starts_with("sqrt.p") || // Added in 7.0
191 Name.starts_with("storeu.") || // Added in 3.9
192 Name.starts_with("vbroadcast.s") || // Added in 3.5
193 Name.starts_with("vbroadcastf128") || // Added in 4.0
194 Name.starts_with("vextractf128.") || // Added in 3.7
195 Name.starts_with("vinsertf128.") || // Added in 3.7
196 Name.starts_with("vperm2f128.") || // Added in 6.0
197 Name.starts_with("vpermil.")); // Added in 3.1
198
199 if (Name.consume_front("avx2."))
200 return (Name == "movntdqa" || // Added in 5.0
201 Name.starts_with("pabs.") || // Added in 6.0
202 Name.starts_with("padds.") || // Added in 8.0
203 Name.starts_with("paddus.") || // Added in 8.0
204 Name.starts_with("pblendd.") || // Added in 3.7
205 Name == "pblendw" || // Added in 3.7
206 Name.starts_with("pbroadcast") || // Added in 3.8
207 Name.starts_with("pcmpeq.") || // Added in 3.1
208 Name.starts_with("pcmpgt.") || // Added in 3.1
209 Name.starts_with("pmax") || // Added in 3.9
210 Name.starts_with("pmin") || // Added in 3.9
211 Name.starts_with("pmovsx") || // Added in 3.9
212 Name.starts_with("pmovzx") || // Added in 3.9
213 Name == "pmul.dq" || // Added in 7.0
214 Name == "pmulu.dq" || // Added in 7.0
215 Name.starts_with("psll.dq") || // Added in 3.7
216 Name.starts_with("psrl.dq") || // Added in 3.7
217 Name.starts_with("psubs.") || // Added in 8.0
218 Name.starts_with("psubus.") || // Added in 8.0
219 Name.starts_with("vbroadcast") || // Added in 3.8
220 Name == "vbroadcasti128" || // Added in 3.7
221 Name == "vextracti128" || // Added in 3.7
222 Name == "vinserti128" || // Added in 3.7
223 Name == "vperm2i128"); // Added in 6.0
224
225 if (Name.consume_front("avx512.")) {
226 if (Name.consume_front("mask."))
227 // 'avx512.mask.*'
228 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with("and.") || // Added in 3.9
230 Name.starts_with("andn.") || // Added in 3.9
231 Name.starts_with("broadcast.s") || // Added in 3.9
232 Name.starts_with("broadcastf32x4.") || // Added in 6.0
233 Name.starts_with("broadcastf32x8.") || // Added in 6.0
234 Name.starts_with("broadcastf64x2.") || // Added in 6.0
235 Name.starts_with("broadcastf64x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x4.") || // Added in 6.0
237 Name.starts_with("broadcasti32x8.") || // Added in 6.0
238 Name.starts_with("broadcasti64x2.") || // Added in 6.0
239 Name.starts_with("broadcasti64x4.") || // Added in 6.0
240 Name.starts_with("cmp.b") || // Added in 5.0
241 Name.starts_with("cmp.d") || // Added in 5.0
242 Name.starts_with("cmp.q") || // Added in 5.0
243 Name.starts_with("cmp.w") || // Added in 5.0
244 Name.starts_with("compress.b") || // Added in 9.0
245 Name.starts_with("compress.d") || // Added in 9.0
246 Name.starts_with("compress.p") || // Added in 9.0
247 Name.starts_with("compress.q") || // Added in 9.0
248 Name.starts_with("compress.store.") || // Added in 7.0
249 Name.starts_with("compress.w") || // Added in 9.0
250 Name.starts_with("conflict.") || // Added in 9.0
251 Name.starts_with("cvtdq2pd.") || // Added in 4.0
252 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
253 Name == "cvtpd2dq.256" || // Added in 7.0
254 Name == "cvtpd2ps.256" || // Added in 7.0
255 Name == "cvtps2pd.128" || // Added in 7.0
256 Name == "cvtps2pd.256" || // Added in 7.0
257 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
258 Name == "cvtqq2ps.256" || // Added in 9.0
259 Name == "cvtqq2ps.512" || // Added in 9.0
260 Name == "cvttpd2dq.256" || // Added in 7.0
261 Name == "cvttps2dq.128" || // Added in 7.0
262 Name == "cvttps2dq.256" || // Added in 7.0
263 Name.starts_with("cvtudq2pd.") || // Added in 4.0
264 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
265 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
266 Name == "cvtuqq2ps.256" || // Added in 9.0
267 Name == "cvtuqq2ps.512" || // Added in 9.0
268 Name.starts_with("dbpsadbw.") || // Added in 7.0
269 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
270 Name.starts_with("expand.b") || // Added in 9.0
271 Name.starts_with("expand.d") || // Added in 9.0
272 Name.starts_with("expand.load.") || // Added in 7.0
273 Name.starts_with("expand.p") || // Added in 9.0
274 Name.starts_with("expand.q") || // Added in 9.0
275 Name.starts_with("expand.w") || // Added in 9.0
276 Name.starts_with("fpclass.p") || // Added in 7.0
277 Name.starts_with("insert") || // Added in 4.0
278 Name.starts_with("load.") || // Added in 3.9
279 Name.starts_with("loadu.") || // Added in 3.9
280 Name.starts_with("lzcnt.") || // Added in 5.0
281 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
283 Name.starts_with("movddup") || // Added in 3.9
284 Name.starts_with("move.s") || // Added in 4.0
285 Name.starts_with("movshdup") || // Added in 3.9
286 Name.starts_with("movsldup") || // Added in 3.9
287 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
288 Name.starts_with("or.") || // Added in 3.9
289 Name.starts_with("pabs.") || // Added in 6.0
290 Name.starts_with("packssdw.") || // Added in 5.0
291 Name.starts_with("packsswb.") || // Added in 5.0
292 Name.starts_with("packusdw.") || // Added in 5.0
293 Name.starts_with("packuswb.") || // Added in 5.0
294 Name.starts_with("padd.") || // Added in 4.0
295 Name.starts_with("padds.") || // Added in 8.0
296 Name.starts_with("paddus.") || // Added in 8.0
297 Name.starts_with("palignr.") || // Added in 3.9
298 Name.starts_with("pand.") || // Added in 3.9
299 Name.starts_with("pandn.") || // Added in 3.9
300 Name.starts_with("pavg") || // Added in 6.0
301 Name.starts_with("pbroadcast") || // Added in 6.0
302 Name.starts_with("pcmpeq.") || // Added in 3.9
303 Name.starts_with("pcmpgt.") || // Added in 3.9
304 Name.starts_with("perm.df.") || // Added in 3.9
305 Name.starts_with("perm.di.") || // Added in 3.9
306 Name.starts_with("permvar.") || // Added in 7.0
307 Name.starts_with("pmaddubs.w.") || // Added in 7.0
308 Name.starts_with("pmaddw.d.") || // Added in 7.0
309 Name.starts_with("pmax") || // Added in 4.0
310 Name.starts_with("pmin") || // Added in 4.0
311 Name == "pmov.qd.256" || // Added in 9.0
312 Name == "pmov.qd.512" || // Added in 9.0
313 Name == "pmov.wb.256" || // Added in 9.0
314 Name == "pmov.wb.512" || // Added in 9.0
315 Name.starts_with("pmovsx") || // Added in 4.0
316 Name.starts_with("pmovzx") || // Added in 4.0
317 Name.starts_with("pmul.dq.") || // Added in 4.0
318 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
319 Name.starts_with("pmulh.w.") || // Added in 7.0
320 Name.starts_with("pmulhu.w.") || // Added in 7.0
321 Name.starts_with("pmull.") || // Added in 4.0
322 Name.starts_with("pmultishift.qb.") || // Added in 8.0
323 Name.starts_with("pmulu.dq.") || // Added in 4.0
324 Name.starts_with("por.") || // Added in 3.9
325 Name.starts_with("prol.") || // Added in 8.0
326 Name.starts_with("prolv.") || // Added in 8.0
327 Name.starts_with("pror.") || // Added in 8.0
328 Name.starts_with("prorv.") || // Added in 8.0
329 Name.starts_with("pshuf.b.") || // Added in 4.0
330 Name.starts_with("pshuf.d.") || // Added in 3.9
331 Name.starts_with("pshufh.w.") || // Added in 3.9
332 Name.starts_with("pshufl.w.") || // Added in 3.9
333 Name.starts_with("psll.d") || // Added in 4.0
334 Name.starts_with("psll.q") || // Added in 4.0
335 Name.starts_with("psll.w") || // Added in 4.0
336 Name.starts_with("pslli") || // Added in 4.0
337 Name.starts_with("psllv") || // Added in 4.0
338 Name.starts_with("psra.d") || // Added in 4.0
339 Name.starts_with("psra.q") || // Added in 4.0
340 Name.starts_with("psra.w") || // Added in 4.0
341 Name.starts_with("psrai") || // Added in 4.0
342 Name.starts_with("psrav") || // Added in 4.0
343 Name.starts_with("psrl.d") || // Added in 4.0
344 Name.starts_with("psrl.q") || // Added in 4.0
345 Name.starts_with("psrl.w") || // Added in 4.0
346 Name.starts_with("psrli") || // Added in 4.0
347 Name.starts_with("psrlv") || // Added in 4.0
348 Name.starts_with("psub.") || // Added in 4.0
349 Name.starts_with("psubs.") || // Added in 8.0
350 Name.starts_with("psubus.") || // Added in 8.0
351 Name.starts_with("pternlog.") || // Added in 7.0
352 Name.starts_with("punpckh") || // Added in 3.9
353 Name.starts_with("punpckl") || // Added in 3.9
354 Name.starts_with("pxor.") || // Added in 3.9
355 Name.starts_with("shuf.f") || // Added in 6.0
356 Name.starts_with("shuf.i") || // Added in 6.0
357 Name.starts_with("shuf.p") || // Added in 4.0
358 Name.starts_with("sqrt.p") || // Added in 7.0
359 Name.starts_with("store.b.") || // Added in 3.9
360 Name.starts_with("store.d.") || // Added in 3.9
361 Name.starts_with("store.p") || // Added in 3.9
362 Name.starts_with("store.q.") || // Added in 3.9
363 Name.starts_with("store.w.") || // Added in 3.9
364 Name == "store.ss" || // Added in 7.0
365 Name.starts_with("storeu.") || // Added in 3.9
366 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
367 Name.starts_with("ucmp.") || // Added in 5.0
368 Name.starts_with("unpckh.") || // Added in 3.9
369 Name.starts_with("unpckl.") || // Added in 3.9
370 Name.starts_with("valign.") || // Added in 4.0
371 Name == "vcvtph2ps.128" || // Added in 11.0
372 Name == "vcvtph2ps.256" || // Added in 11.0
373 Name.starts_with("vextract") || // Added in 4.0
374 Name.starts_with("vfmadd.") || // Added in 7.0
375 Name.starts_with("vfmaddsub.") || // Added in 7.0
376 Name.starts_with("vfnmadd.") || // Added in 7.0
377 Name.starts_with("vfnmsub.") || // Added in 7.0
378 Name.starts_with("vpdpbusd.") || // Added in 7.0
379 Name.starts_with("vpdpbusds.") || // Added in 7.0
380 Name.starts_with("vpdpwssd.") || // Added in 7.0
381 Name.starts_with("vpdpwssds.") || // Added in 7.0
382 Name.starts_with("vpermi2var.") || // Added in 7.0
383 Name.starts_with("vpermil.p") || // Added in 3.9
384 Name.starts_with("vpermilvar.") || // Added in 4.0
385 Name.starts_with("vpermt2var.") || // Added in 7.0
386 Name.starts_with("vpmadd52") || // Added in 7.0
387 Name.starts_with("vpshld.") || // Added in 7.0
388 Name.starts_with("vpshldv.") || // Added in 8.0
389 Name.starts_with("vpshrd.") || // Added in 7.0
390 Name.starts_with("vpshrdv.") || // Added in 8.0
391 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
392 Name.starts_with("xor.")); // Added in 3.9
393
394 if (Name.consume_front("mask3."))
395 // 'avx512.mask3.*'
396 return (Name.starts_with("vfmadd.") || // Added in 7.0
397 Name.starts_with("vfmaddsub.") || // Added in 7.0
398 Name.starts_with("vfmsub.") || // Added in 7.0
399 Name.starts_with("vfmsubadd.") || // Added in 7.0
400 Name.starts_with("vfnmsub.")); // Added in 7.0
401
402 if (Name.consume_front("maskz."))
403 // 'avx512.maskz.*'
404 return (Name.starts_with("pternlog.") || // Added in 7.0
405 Name.starts_with("vfmadd.") || // Added in 7.0
406 Name.starts_with("vfmaddsub.") || // Added in 7.0
407 Name.starts_with("vpdpbusd.") || // Added in 7.0
408 Name.starts_with("vpdpbusds.") || // Added in 7.0
409 Name.starts_with("vpdpwssd.") || // Added in 7.0
410 Name.starts_with("vpdpwssds.") || // Added in 7.0
411 Name.starts_with("vpermt2var.") || // Added in 7.0
412 Name.starts_with("vpmadd52") || // Added in 7.0
413 Name.starts_with("vpshldv.") || // Added in 8.0
414 Name.starts_with("vpshrdv.")); // Added in 8.0
415
416 // 'avx512.*'
417 return (Name == "movntdqa" || // Added in 5.0
418 Name == "pmul.dq.512" || // Added in 7.0
419 Name == "pmulu.dq.512" || // Added in 7.0
420 Name.starts_with("broadcastm") || // Added in 6.0
421 Name.starts_with("cmp.p") || // Added in 12.0
422 Name.starts_with("cvtb2mask.") || // Added in 7.0
423 Name.starts_with("cvtd2mask.") || // Added in 7.0
424 Name.starts_with("cvtmask2") || // Added in 5.0
425 Name.starts_with("cvtq2mask.") || // Added in 7.0
426 Name == "cvtusi2sd" || // Added in 7.0
427 Name.starts_with("cvtw2mask.") || // Added in 7.0
428 Name == "kand.w" || // Added in 7.0
429 Name == "kandn.w" || // Added in 7.0
430 Name == "knot.w" || // Added in 7.0
431 Name == "kor.w" || // Added in 7.0
432 Name == "kortestc.w" || // Added in 7.0
433 Name == "kortestz.w" || // Added in 7.0
434 Name.starts_with("kunpck") || // added in 6.0
435 Name == "kxnor.w" || // Added in 7.0
436 Name == "kxor.w" || // Added in 7.0
437 Name.starts_with("padds.") || // Added in 8.0
438 Name.starts_with("pbroadcast") || // Added in 3.9
439 Name.starts_with("prol") || // Added in 8.0
440 Name.starts_with("pror") || // Added in 8.0
441 Name.starts_with("psll.dq") || // Added in 3.9
442 Name.starts_with("psrl.dq") || // Added in 3.9
443 Name.starts_with("psubs.") || // Added in 8.0
444 Name.starts_with("ptestm") || // Added in 6.0
445 Name.starts_with("ptestnm") || // Added in 6.0
446 Name.starts_with("storent.") || // Added in 3.9
447 Name.starts_with("vbroadcast.s") || // Added in 7.0
448 Name.starts_with("vpshld.") || // Added in 8.0
449 Name.starts_with("vpshrd.")); // Added in 8.0
450 }
451
452 if (Name.consume_front("fma."))
453 return (Name.starts_with("vfmadd.") || // Added in 7.0
454 Name.starts_with("vfmsub.") || // Added in 7.0
455 Name.starts_with("vfmsubadd.") || // Added in 7.0
456 Name.starts_with("vfnmadd.") || // Added in 7.0
457 Name.starts_with("vfnmsub.")); // Added in 7.0
458
459 if (Name.consume_front("fma4."))
460 return Name.starts_with("vfmadd.s"); // Added in 7.0
461
462 if (Name.consume_front("sse."))
463 return (Name == "add.ss" || // Added in 4.0
464 Name == "cvtsi2ss" || // Added in 7.0
465 Name == "cvtsi642ss" || // Added in 7.0
466 Name == "div.ss" || // Added in 4.0
467 Name == "mul.ss" || // Added in 4.0
468 Name.starts_with("sqrt.p") || // Added in 7.0
469 Name == "sqrt.ss" || // Added in 7.0
470 Name.starts_with("storeu.") || // Added in 3.9
471 Name == "sub.ss"); // Added in 4.0
472
473 if (Name.consume_front("sse2."))
474 return (Name == "add.sd" || // Added in 4.0
475 Name == "cvtdq2pd" || // Added in 3.9
476 Name == "cvtdq2ps" || // Added in 7.0
477 Name == "cvtps2pd" || // Added in 3.9
478 Name == "cvtsi2sd" || // Added in 7.0
479 Name == "cvtsi642sd" || // Added in 7.0
480 Name == "cvtss2sd" || // Added in 7.0
481 Name == "div.sd" || // Added in 4.0
482 Name == "mul.sd" || // Added in 4.0
483 Name.starts_with("padds.") || // Added in 8.0
484 Name.starts_with("paddus.") || // Added in 8.0
485 Name.starts_with("pcmpeq.") || // Added in 3.1
486 Name.starts_with("pcmpgt.") || // Added in 3.1
487 Name == "pmaxs.w" || // Added in 3.9
488 Name == "pmaxu.b" || // Added in 3.9
489 Name == "pmins.w" || // Added in 3.9
490 Name == "pminu.b" || // Added in 3.9
491 Name == "pmulu.dq" || // Added in 7.0
492 Name.starts_with("pshuf") || // Added in 3.9
493 Name.starts_with("psll.dq") || // Added in 3.7
494 Name.starts_with("psrl.dq") || // Added in 3.7
495 Name.starts_with("psubs.") || // Added in 8.0
496 Name.starts_with("psubus.") || // Added in 8.0
497 Name.starts_with("sqrt.p") || // Added in 7.0
498 Name == "sqrt.sd" || // Added in 7.0
499 Name == "storel.dq" || // Added in 3.9
500 Name.starts_with("storeu.") || // Added in 3.9
501 Name == "sub.sd"); // Added in 4.0
502
503 if (Name.consume_front("sse41."))
504 return (Name.starts_with("blendp") || // Added in 3.7
505 Name == "movntdqa" || // Added in 5.0
506 Name == "pblendw" || // Added in 3.7
507 Name == "pmaxsb" || // Added in 3.9
508 Name == "pmaxsd" || // Added in 3.9
509 Name == "pmaxud" || // Added in 3.9
510 Name == "pmaxuw" || // Added in 3.9
511 Name == "pminsb" || // Added in 3.9
512 Name == "pminsd" || // Added in 3.9
513 Name == "pminud" || // Added in 3.9
514 Name == "pminuw" || // Added in 3.9
515 Name.starts_with("pmovsx") || // Added in 3.8
516 Name.starts_with("pmovzx") || // Added in 3.9
517 Name == "pmuldq"); // Added in 7.0
518
519 if (Name.consume_front("sse42."))
520 return Name == "crc32.64.8"; // Added in 3.4
521
522 if (Name.consume_front("sse4a."))
523 return Name.starts_with("movnt."); // Added in 3.9
524
525 if (Name.consume_front("ssse3."))
526 return (Name == "pabs.b.128" || // Added in 6.0
527 Name == "pabs.d.128" || // Added in 6.0
528 Name == "pabs.w.128"); // Added in 6.0
529
530 if (Name.consume_front("xop."))
531 return (Name == "vpcmov" || // Added in 3.8
532 Name == "vpcmov.256" || // Added in 5.0
533 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
534 Name.starts_with("vprot")); // Added in 8.0
535
536 if (Name.consume_front("bmi."))
537 return (Name.starts_with("pdep.") || // Added in 23.0
538 Name.starts_with("pext.")); // Added in 23.0
539
540 return (Name == "addcarry.u32" || // Added in 8.0
541 Name == "addcarry.u64" || // Added in 8.0
542 Name == "addcarryx.u32" || // Added in 8.0
543 Name == "addcarryx.u64" || // Added in 8.0
544 Name == "subborrow.u32" || // Added in 8.0
545 Name == "subborrow.u64" || // Added in 8.0
546 Name.starts_with("vcvtph2ps.")); // Added in 11.0
547}
548
550 Function *&NewFn) {
551 // Only handle intrinsics that start with "x86.".
552 if (!Name.consume_front("x86."))
553 return false;
554
555 if (shouldUpgradeX86Intrinsic(F, Name)) {
556 NewFn = nullptr;
557 return true;
558 }
559
560 if (Name == "rdtscp") { // Added in 8.0
561 // If this intrinsic has 0 operands, it's the new version.
562 if (F->getFunctionType()->getNumParams() == 0)
563 return false;
564
565 rename(F);
566 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
567 Intrinsic::x86_rdtscp);
568 return true;
569 }
570
572
573 // SSE4.1 ptest functions may have an old signature.
574 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
576 .Case("c", Intrinsic::x86_sse41_ptestc)
577 .Case("z", Intrinsic::x86_sse41_ptestz)
578 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
581 return upgradePTESTIntrinsic(F, ID, NewFn);
582
583 return false;
584 }
585
586 // Several blend and other instructions with masks used the wrong number of
587 // bits.
588
589 // Added in 3.6
591 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
592 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
593 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
594 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
595 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
596 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
599 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
600
601 if (Name.consume_front("avx512.")) {
602 if (Name.consume_front("mask.cmp.")) {
603 // Added in 7.0
605 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
606 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
607 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
608 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
609 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
610 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
613 return upgradeX86MaskedFPCompare(F, ID, NewFn);
614 } else if (Name.starts_with("vpdpbusd.") ||
615 Name.starts_with("vpdpbusds.")) {
616 // Added in 21.1
618 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
619 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
620 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
621 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
622 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
623 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
626 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
627 } else if (Name.starts_with("vpdpwssd.") ||
628 Name.starts_with("vpdpwssds.")) {
629 // Added in 21.1
631 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
632 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
633 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
634 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
635 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
636 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 return upgradeX86MultiplyAddWords(F, ID, NewFn);
640 }
641 return false; // No other 'x86.avx512.*'.
642 }
643
644 if (Name.consume_front("avx2.")) {
645 if (Name.consume_front("vpdpb")) {
646 // Added in 21.1
648 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
649 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
650 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
651 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
652 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
653 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
654 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
655 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
656 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
657 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
658 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
659 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
662 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
663 } else if (Name.consume_front("vpdpw")) {
664 // Added in 21.1
666 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
667 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
668 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
669 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
670 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
671 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
672 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
673 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
674 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
675 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
676 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
677 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 return upgradeX86MultiplyAddWords(F, ID, NewFn);
681 }
682 return false; // No other 'x86.avx2.*'
683 }
684
685 if (Name.consume_front("avx10.")) {
686 if (Name.consume_front("vpdpb")) {
687 // Added in 21.1
689 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
690 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
691 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
692 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
693 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
694 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
697 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
698 } else if (Name.consume_front("vpdpw")) {
700 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
701 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
702 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
703 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
704 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
705 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 return upgradeX86MultiplyAddWords(F, ID, NewFn);
709 }
710 return false; // No other 'x86.avx10.*'
711 }
712
713 if (Name.consume_front("avx512bf16.")) {
714 // Added in 9.0
716 .Case("cvtne2ps2bf16.128",
717 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
718 .Case("cvtne2ps2bf16.256",
719 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
720 .Case("cvtne2ps2bf16.512",
721 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
722 .Case("mask.cvtneps2bf16.128",
723 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
724 .Case("cvtneps2bf16.256",
725 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
726 .Case("cvtneps2bf16.512",
727 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
730 return upgradeX86BF16Intrinsic(F, ID, NewFn);
731
732 // Added in 9.0
734 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
735 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
736 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
739 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
740 return false; // No other 'x86.avx512bf16.*'.
741 }
742
743 if (Name.consume_front("xop.")) {
745 if (Name.starts_with("vpermil2")) { // Added in 3.9
746 // Upgrade any XOP PERMIL2 index operand still using a float/double
747 // vector.
748 auto Idx = F->getFunctionType()->getParamType(2);
749 if (Idx->isFPOrFPVectorTy()) {
750 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
751 unsigned EltSize = Idx->getScalarSizeInBits();
752 if (EltSize == 64 && IdxSize == 128)
753 ID = Intrinsic::x86_xop_vpermil2pd;
754 else if (EltSize == 32 && IdxSize == 128)
755 ID = Intrinsic::x86_xop_vpermil2ps;
756 else if (EltSize == 64 && IdxSize == 256)
757 ID = Intrinsic::x86_xop_vpermil2pd_256;
758 else
759 ID = Intrinsic::x86_xop_vpermil2ps_256;
760 }
761 } else if (F->arg_size() == 2)
762 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
764 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
765 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
767
769 rename(F);
770 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
771 return true;
772 }
773 return false; // No other 'x86.xop.*'
774 }
775
776 if (Name == "seh.recoverfp") {
777 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
778 Intrinsic::eh_recoverfp);
779 return true;
780 }
781
782 return false;
783}
784
785// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
786// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
788 StringRef Name,
789 Function *&NewFn) {
790 if (Name.starts_with("rbit")) {
791 // '(arm|aarch64).rbit'.
793 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
794 return true;
795 }
796
797 if (Name == "thread.pointer") {
798 // '(arm|aarch64).thread.pointer'.
800 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
801 return true;
802 }
803
804 bool Neon = Name.consume_front("neon.");
805 if (Neon) {
806 // '(arm|aarch64).neon.*'.
807 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
808 // v16i8 respectively.
809 if (Name.consume_front("bfdot.")) {
810 // (arm|aarch64).neon.bfdot.*'.
813 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
814 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
815 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
818 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
819 assert((OperandWidth == 64 || OperandWidth == 128) &&
820 "Unexpected operand width");
821 LLVMContext &Ctx = F->getParent()->getContext();
822 std::array<Type *, 2> Tys{
823 {F->getReturnType(),
824 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
825 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
826 return true;
827 }
828 return false; // No other '(arm|aarch64).neon.bfdot.*'.
829 }
830
831 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
832 // anymore and accept v8bf16 instead of v16i8.
833 if (Name.consume_front("bfm")) {
834 // (arm|aarch64).neon.bfm*'.
835 if (Name.consume_back(".v4f32.v16i8")) {
836 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
839 .Case("mla",
840 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
841 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
842 .Case("lalb",
843 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
844 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
845 .Case("lalt",
846 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
847 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
850 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
851 return true;
852 }
853 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
854 }
855 return false; // No other '(arm|aarch64).neon.bfm*.
856 }
857 // Continue on to Aarch64 Neon or Arm Neon.
858 }
859 // Continue on to Arm or Aarch64.
860
861 if (IsArm) {
862 // 'arm.*'.
863 if (Neon) {
864 // 'arm.neon.*'.
866 .StartsWith("vclz.", Intrinsic::ctlz)
867 .StartsWith("vcnt.", Intrinsic::ctpop)
868 .StartsWith("vqadds.", Intrinsic::sadd_sat)
869 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
870 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
871 .StartsWith("vqsubu.", Intrinsic::usub_sat)
872 .StartsWith("vrinta.", Intrinsic::round)
873 .StartsWith("vrintn.", Intrinsic::roundeven)
874 .StartsWith("vrintm.", Intrinsic::floor)
875 .StartsWith("vrintp.", Intrinsic::ceil)
876 .StartsWith("vrintx.", Intrinsic::rint)
877 .StartsWith("vrintz.", Intrinsic::trunc)
880 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
881 F->arg_begin()->getType());
882 return true;
883 }
884
885 if (Name.consume_front("vst")) {
886 // 'arm.neon.vst*'.
887 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
889 if (vstRegex.match(Name, &Groups)) {
890 static const Intrinsic::ID StoreInts[] = {
891 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
892 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
893
894 static const Intrinsic::ID StoreLaneInts[] = {
895 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
896 Intrinsic::arm_neon_vst4lane};
897
898 auto fArgs = F->getFunctionType()->params();
899 Type *Tys[] = {fArgs[0], fArgs[1]};
900 if (Groups[1].size() == 1)
902 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
903 else
905 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
906 return true;
907 }
908 return false; // No other 'arm.neon.vst*'.
909 }
910
911 return false; // No other 'arm.neon.*'.
912 }
913
914 if (Name.consume_front("mve.")) {
915 // 'arm.mve.*'.
916 if (Name == "vctp64") {
917 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
918 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
919 // the function and deal with it below in UpgradeIntrinsicCall.
920 rename(F);
921 return true;
922 }
923 return false; // Not 'arm.mve.vctp64'.
924 }
925
926 if (Name.starts_with("vrintn.v")) {
928 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
929 return true;
930 }
931
932 // These too are changed to accept a v2i1 instead of the old v4i1.
933 if (Name.consume_back(".v4i1")) {
934 // 'arm.mve.*.v4i1'.
935 if (Name.consume_back(".predicated.v2i64.v4i32"))
936 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
937 return Name == "mull.int" || Name == "vqdmull";
938
939 if (Name.consume_back(".v2i64")) {
940 // 'arm.mve.*.v2i64.v4i1'
941 bool IsGather = Name.consume_front("vldr.gather.");
942 if (IsGather || Name.consume_front("vstr.scatter.")) {
943 if (Name.consume_front("base.")) {
944 // Optional 'wb.' prefix.
945 Name.consume_front("wb.");
946 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
947 // predicated.v2i64.v2i64.v4i1'.
948 return Name == "predicated.v2i64";
949 }
950
951 if (Name.consume_front("offset.predicated."))
952 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
953 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
954
955 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
956 return false;
957 }
958
959 return false; // No other 'arm.mve.*.v2i64.v4i1'.
960 }
961 return false; // No other 'arm.mve.*.v4i1'.
962 }
963 return false; // No other 'arm.mve.*'.
964 }
965
966 if (Name.consume_front("cde.vcx")) {
967 // 'arm.cde.vcx*'.
968 if (Name.consume_back(".predicated.v2i64.v4i1"))
969 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
970 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
971 Name == "3q" || Name == "3qa";
972
973 return false; // No other 'arm.cde.vcx*'.
974 }
975 } else {
976 // 'aarch64.*'.
977 if (Neon) {
978 // 'aarch64.neon.*'.
980 .StartsWith("frintn", Intrinsic::roundeven)
981 .StartsWith("rbit", Intrinsic::bitreverse)
984 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
985 F->arg_begin()->getType());
986 return true;
987 }
988
989 if (Name.starts_with("addp")) {
990 // 'aarch64.neon.addp*'.
991 if (F->arg_size() != 2)
992 return false; // Invalid IR.
993 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
994 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
996 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 return true;
998 }
999 }
1000
1001 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
1002 if (Name.starts_with("bfcvt")) {
1003 NewFn = nullptr;
1004 return true;
1005 }
1006
1007 // vcvtfp2hf and vcvthf2fp -> fpext and fptrunc
1008 if (Name == "vcvtfp2hf" || Name == "vcvthf2fp") {
1009 NewFn = nullptr;
1010 return true;
1011 }
1012
1013 return false; // No other 'aarch64.neon.*'.
1014 }
1015 if (Name.consume_front("sve.")) {
1016 // 'aarch64.sve.*'.
1017 if (Name.consume_front("bf")) {
1018 if (Name == "mmla") {
1019 Type *Tys[] = {F->getReturnType(),
1020 std::next(F->arg_begin())->getType()};
1022 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1023 return true;
1024 }
1025 if (Name.consume_back(".lane")) {
1026 // 'aarch64.sve.bf*.lane'.
1029 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1030 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1031 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1034 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1035 return true;
1036 }
1037 return false; // No other 'aarch64.sve.bf*.lane'.
1038 }
1039 return false; // No other 'aarch64.sve.bf*'.
1040 }
1041
1042 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1043 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1044 NewFn = nullptr;
1045 return true;
1046 }
1047
1048 if (Name.consume_front("addqv")) {
1049 // 'aarch64.sve.addqv'.
1050 if (!F->getReturnType()->isFPOrFPVectorTy())
1051 return false;
1052
1053 auto Args = F->getFunctionType()->params();
1054 Type *Tys[] = {F->getReturnType(), Args[1]};
1056 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1057 return true;
1058 }
1059
1060 if (Name.consume_front("ld")) {
1061 // 'aarch64.sve.ld*'.
1062 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1063 if (LdRegex.match(Name)) {
1064 Type *ScalarTy =
1065 cast<VectorType>(F->getReturnType())->getElementType();
1066 ElementCount EC =
1067 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1068 assert(F->arg_size() == 2 &&
1069 "Expected 2 arguments for ld* intrinsic.");
1070 Type *PtrTy = F->getArg(1)->getType();
1071 Type *Ty = VectorType::get(ScalarTy, EC);
1072 static const Intrinsic::ID LoadIDs[] = {
1073 Intrinsic::aarch64_sve_ld2_sret,
1074 Intrinsic::aarch64_sve_ld3_sret,
1075 Intrinsic::aarch64_sve_ld4_sret,
1076 };
1078 F->getParent(), LoadIDs[Name[0] - '2'], {Ty, PtrTy});
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.ld*'.
1082 }
1083
1084 if (Name.consume_front("tuple.")) {
1085 // 'aarch64.sve.tuple.*'.
1086 if (Name.starts_with("get")) {
1087 // 'aarch64.sve.tuple.get*'.
1088 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1090 F->getParent(), Intrinsic::vector_extract, Tys);
1091 return true;
1092 }
1093
1094 if (Name.starts_with("set")) {
1095 // 'aarch64.sve.tuple.set*'.
1096 auto Args = F->getFunctionType()->params();
1097 Type *Tys[] = {Args[0], Args[2], Args[1]};
1099 F->getParent(), Intrinsic::vector_insert, Tys);
1100 return true;
1101 }
1102
1103 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1104 if (CreateTupleRegex.match(Name)) {
1105 // 'aarch64.sve.tuple.create*'.
1106 auto Args = F->getFunctionType()->params();
1107 Type *Tys[] = {F->getReturnType(), Args[1]};
1109 F->getParent(), Intrinsic::vector_insert, Tys);
1110 return true;
1111 }
1112 return false; // No other 'aarch64.sve.tuple.*'.
1113 }
1114
1115 if (Name.starts_with("rev.nxv")) {
1116 // 'aarch64.sve.rev.<Ty>'
1118 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1119 return true;
1120 }
1121
1122 return false; // No other 'aarch64.sve.*'.
1123 }
1124 if (Name.consume_front("sme.")) {
1125 // 'aarch64.sme.*'.
1126 if (Name.consume_front("ftmopa.")) {
1127 // The FP8 FTMOPA intrinsics were split out from the non-FP8 FTMOPA
1128 // intrinsics to model their FPMR dependency.
1131 .Case("za16.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za16)
1132 .Case("za32.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za32)
1135 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1136 return true;
1137 }
1138 return false; // No other 'aarch64.sme.ftmopa.*'.
1139 }
1140
1141 return false; // No other 'aarch64.sme.*'.
1142 }
1143 }
1144 return false; // No other 'arm.*', 'aarch64.*'.
1145}
1146
1148 StringRef Name) {
1149 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1152 .Case("im2col.3d",
1153 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1154 .Case("im2col.4d",
1155 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1156 .Case("im2col.5d",
1157 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1158 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1159 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1160 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1161 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1162 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1164
1166 return ID;
1167
1168 // These intrinsics may need upgrade for two reasons:
1169 // (1) When the address-space of the first argument is shared[AS=3]
1170 // (and we upgrade it to use shared_cluster address-space[AS=7])
1171 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1173 return ID;
1174
1175 // (2) When there are only two boolean flag arguments at the end:
1176 //
1177 // The last three parameters of the older version of these
1178 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1179 //
1180 // The newer version reads as:
1181 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1182 //
1183 // So, when the type of the [N-3]rd argument is "not i1", then
1184 // it is the older version and we need to upgrade.
1185 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1186 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1187 if (!ArgType->isIntegerTy(1))
1188 return ID;
1189 }
1190
1192}
1193
1195 StringRef Name) {
1196 if (Name.consume_front("mapa.shared.cluster"))
1197 if (F->getReturnType()->getPointerAddressSpace() ==
1199 return Intrinsic::nvvm_mapa_shared_cluster;
1200
1201 if (Name.consume_front("cp.async.bulk.")) {
1204 .Case("global.to.shared.cluster",
1205 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1206 .Case("shared.cta.to.cluster",
1207 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1209
1211 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1213 return ID;
1214 }
1215
1217}
1218
1220 if (Name.consume_front("fma.rn."))
1221 return StringSwitch<Intrinsic::ID>(Name)
1222 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1223 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1224 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1225 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1227
1228 if (Name.consume_front("fmax."))
1229 return StringSwitch<Intrinsic::ID>(Name)
1230 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1231 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1232 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1233 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1234 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1235 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1236 .Case("ftz.nan.xorsign.abs.bf16",
1237 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1238 .Case("ftz.nan.xorsign.abs.bf16x2",
1239 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1240 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1241 .Case("ftz.xorsign.abs.bf16x2",
1242 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1243 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1244 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1245 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1246 .Case("nan.xorsign.abs.bf16x2",
1247 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1248 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1249 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1251
1252 if (Name.consume_front("fmin."))
1253 return StringSwitch<Intrinsic::ID>(Name)
1254 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1255 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1256 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1257 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1258 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1259 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1260 .Case("ftz.nan.xorsign.abs.bf16",
1261 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1262 .Case("ftz.nan.xorsign.abs.bf16x2",
1263 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1264 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1265 .Case("ftz.xorsign.abs.bf16x2",
1266 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1267 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1268 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1269 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1270 .Case("nan.xorsign.abs.bf16x2",
1271 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1272 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1273 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1275
1276 if (Name.consume_front("neg."))
1277 return StringSwitch<Intrinsic::ID>(Name)
1278 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1279 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1281
1283}
1284
1286 return Name.consume_front("local") || Name.consume_front("shared") ||
1287 Name.consume_front("global") || Name.consume_front("constant") ||
1288 Name.consume_front("param");
1289}
1290
1292 const FunctionType *FuncTy) {
1293 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1294 if (Name.starts_with("to.fp16")) {
1295 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1296 HalfTy) &&
1297 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1298 FuncTy->getReturnType());
1299 }
1300
1301 if (Name.starts_with("from.fp16")) {
1302 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1303 HalfTy) &&
1304 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1305 FuncTy->getReturnType());
1306 }
1307
1308 return false;
1309}
1310
1312 bool CanUpgradeDebugIntrinsicsToRecords) {
1313 assert(F && "Illegal to upgrade a non-existent Function.");
1314
1315 StringRef Name = F->getName();
1316
1317 // Quickly eliminate it, if it's not a candidate.
1318 if (!Name.consume_front("llvm.") || Name.empty())
1319 return false;
1320
1321 switch (Name[0]) {
1322 default: break;
1323 case 'a': {
1324 bool IsArm = Name.consume_front("arm.");
1325 if (IsArm || Name.consume_front("aarch64.")) {
1326 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1327 return true;
1328 break;
1329 }
1330
1331 if (Name.consume_front("amdgcn.")) {
1332 if (Name == "alignbit") {
1333 // Target specific intrinsic became redundant
1335 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1336 return true;
1337 }
1338
1339 if (Name.consume_front("atomic.")) {
1340 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1341 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1342 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1343 // and usub_sat so there's no new declaration.
1344 NewFn = nullptr;
1345 return true;
1346 }
1347 break; // No other 'amdgcn.atomic.*'
1348 }
1349
1350 switch (F->getIntrinsicID()) {
1351 default:
1352 break;
1353 // Legacy wmma iu intrinsics without the optional clamp operand.
1354 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1355 if (F->arg_size() == 7) {
1356 NewFn = nullptr;
1357 return true;
1358 }
1359 break;
1360 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1361 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1362 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1363 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1364 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1365 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1366 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1367 if (F->arg_size() == 8) {
1368 NewFn = nullptr;
1369 return true;
1370 }
1371 break;
1372 }
1373
1374 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1375 Name.consume_front("flat.atomic.")) {
1376 if (Name.starts_with("fadd") ||
1377 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1378 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1379 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1380 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1381 // declaration.
1382 NewFn = nullptr;
1383 return true;
1384 }
1385 }
1386
1387 if (Name.starts_with("ldexp.")) {
1388 // Target specific intrinsic became redundant
1390 F->getParent(), Intrinsic::ldexp,
1391 {F->getReturnType(), F->getArg(1)->getType()});
1392 return true;
1393 }
1394 break; // No other 'amdgcn.*'
1395 }
1396
1397 break;
1398 }
1399 case 'c': {
1400 if (F->arg_size() == 1) {
1401 if (Name.consume_front("convert.")) {
1402 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1403 NewFn = nullptr;
1404 return true;
1405 }
1406 }
1407
1409 .StartsWith("ctlz.", Intrinsic::ctlz)
1410 .StartsWith("cttz.", Intrinsic::cttz)
1413 rename(F);
1414 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1415 F->arg_begin()->getType());
1416 return true;
1417 }
1418 }
1419
1420 if (F->arg_size() == 2 && Name == "coro.end") {
1421 rename(F);
1422 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1423 Intrinsic::coro_end);
1424 return true;
1425 }
1426
1427 break;
1428 }
1429 case 'd':
1430 if (Name.consume_front("dbg.")) {
1431 // Mark debug intrinsics for upgrade to new debug format.
1432 if (CanUpgradeDebugIntrinsicsToRecords) {
1433 if (Name == "addr" || Name == "value" || Name == "assign" ||
1434 Name == "declare" || Name == "label") {
1435 // There's no function to replace these with.
1436 NewFn = nullptr;
1437 // But we do want these to get upgraded.
1438 return true;
1439 }
1440 }
1441 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1442 // converted to DbgVariableRecords later.
1443 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1444 rename(F);
1445 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1446 Intrinsic::dbg_value);
1447 return true;
1448 }
1449 break; // No other 'dbg.*'.
1450 }
1451 break;
1452 case 'e':
1453 if (Name.consume_front("experimental.vector.")) {
1456 // Skip over extract.last.active, otherwise it will be 'upgraded'
1457 // to a regular vector extract which is a different operation.
1458 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1459 .StartsWith("extract.", Intrinsic::vector_extract)
1460 .StartsWith("insert.", Intrinsic::vector_insert)
1461 .StartsWith("reverse.", Intrinsic::vector_reverse)
1462 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1463 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1464 .StartsWith("partial.reduce.add",
1465 Intrinsic::vector_partial_reduce_add)
1468 const auto *FT = F->getFunctionType();
1470 if (ID == Intrinsic::vector_extract ||
1471 ID == Intrinsic::vector_interleave2)
1472 // Extracting overloads the return type.
1473 Tys.push_back(FT->getReturnType());
1474 if (ID != Intrinsic::vector_interleave2)
1475 Tys.push_back(FT->getParamType(0));
1476 if (ID == Intrinsic::vector_insert ||
1477 ID == Intrinsic::vector_partial_reduce_add)
1478 // Inserting overloads the inserted type.
1479 Tys.push_back(FT->getParamType(1));
1480 rename(F);
1481 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1482 return true;
1483 }
1484
1485 if (Name.consume_front("reduce.")) {
1487 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1488 if (R.match(Name, &Groups))
1490 .Case("add", Intrinsic::vector_reduce_add)
1491 .Case("mul", Intrinsic::vector_reduce_mul)
1492 .Case("and", Intrinsic::vector_reduce_and)
1493 .Case("or", Intrinsic::vector_reduce_or)
1494 .Case("xor", Intrinsic::vector_reduce_xor)
1495 .Case("smax", Intrinsic::vector_reduce_smax)
1496 .Case("smin", Intrinsic::vector_reduce_smin)
1497 .Case("umax", Intrinsic::vector_reduce_umax)
1498 .Case("umin", Intrinsic::vector_reduce_umin)
1499 .Case("fmax", Intrinsic::vector_reduce_fmax)
1500 .Case("fmin", Intrinsic::vector_reduce_fmin)
1502
1503 bool V2 = false;
1505 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1506 Groups.clear();
1507 V2 = true;
1508 if (R2.match(Name, &Groups))
1510 .Case("fadd", Intrinsic::vector_reduce_fadd)
1511 .Case("fmul", Intrinsic::vector_reduce_fmul)
1513 }
1515 rename(F);
1516 auto Args = F->getFunctionType()->params();
1517 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1518 {Args[V2 ? 1 : 0]});
1519 return true;
1520 }
1521 break; // No other 'expermental.vector.reduce.*'.
1522 }
1523
1524 if (Name.consume_front("splice"))
1525 return true;
1526 break; // No other 'experimental.vector.*'.
1527 }
1528 if (Name.consume_front("experimental.stepvector.")) {
1529 Intrinsic::ID ID = Intrinsic::stepvector;
1530 rename(F);
1532 F->getParent(), ID, F->getFunctionType()->getReturnType());
1533 return true;
1534 }
1535 break; // No other 'e*'.
1536 case 'f':
1537 if (Name.starts_with("flt.rounds")) {
1538 rename(F);
1539 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1540 Intrinsic::get_rounding);
1541 return true;
1542 }
1543 break;
1544 case 'i':
1545 if (Name.starts_with("invariant.group.barrier")) {
1546 // Rename invariant.group.barrier to launder.invariant.group
1547 auto Args = F->getFunctionType()->params();
1548 Type* ObjectPtr[1] = {Args[0]};
1549 rename(F);
1551 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1552 return true;
1553 }
1554 break;
1555 case 'l':
1556 if ((Name.starts_with("lifetime.start") ||
1557 Name.starts_with("lifetime.end")) &&
1558 F->arg_size() == 2) {
1559 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1560 ? Intrinsic::lifetime_start
1561 : Intrinsic::lifetime_end;
1562 rename(F);
1563 // Old 2 argument form of these intrinsics have [Size, Ptr] as arguments.
1564 // Use the Ptr argument to create new declaration.
1565 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1566 F->getArg(1)->getType());
1567 return true;
1568 }
1569 break;
1570 case 'm': {
1571 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1572 // alignment parameter to embedding the alignment as an attribute of
1573 // the pointer args.
1574 if (unsigned ID = StringSwitch<unsigned>(Name)
1575 .StartsWith("memcpy.", Intrinsic::memcpy)
1576 .StartsWith("memmove.", Intrinsic::memmove)
1577 .Default(0)) {
1578 if (F->arg_size() == 5) {
1579 rename(F);
1580 // Get the types of dest, src, and len
1581 ArrayRef<Type *> ParamTypes =
1582 F->getFunctionType()->params().slice(0, 3);
1583 NewFn =
1584 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1585 return true;
1586 }
1587 }
1588 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1589 rename(F);
1590 // Get the types of dest, and len
1591 const auto *FT = F->getFunctionType();
1592 Type *ParamTypes[2] = {
1593 FT->getParamType(0), // Dest
1594 FT->getParamType(2) // len
1595 };
1596 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1597 Intrinsic::memset, ParamTypes);
1598 return true;
1599 }
1600
1601 unsigned MaskedID =
1603 .StartsWith("masked.load", Intrinsic::masked_load)
1604 .StartsWith("masked.gather", Intrinsic::masked_gather)
1605 .StartsWith("masked.store", Intrinsic::masked_store)
1606 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1607 .Default(0);
1608 if (MaskedID && F->arg_size() == 4) {
1609 rename(F);
1610 if (MaskedID == Intrinsic::masked_load ||
1611 MaskedID == Intrinsic::masked_gather) {
1613 F->getParent(), MaskedID,
1614 {F->getReturnType(), F->getArg(0)->getType()});
1615 return true;
1616 }
1618 F->getParent(), MaskedID,
1619 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1620 return true;
1621 }
1622 break;
1623 }
1624 case 'n': {
1625 if (Name.consume_front("nvvm.")) {
1626 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1627 if (F->arg_size() == 1) {
1628 Intrinsic::ID IID =
1630 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1631 .Case("clz.i", Intrinsic::ctlz)
1632 .Case("popc.i", Intrinsic::ctpop)
1634 if (IID != Intrinsic::not_intrinsic) {
1635 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1636 {F->getReturnType()});
1637 return true;
1638 }
1639 } else if (F->arg_size() == 2) {
1640 Intrinsic::ID IID =
1642 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1643 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1644 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1645 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1647 if (IID != Intrinsic::not_intrinsic) {
1648 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1649 {F->getReturnType()});
1650 return true;
1651 }
1652 }
1653
1654 // Check for nvvm intrinsics that need a return type adjustment.
1655 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1657 if (IID != Intrinsic::not_intrinsic) {
1658 NewFn = nullptr;
1659 return true;
1660 }
1661 }
1662
1663 // Upgrade Distributed Shared Memory Intrinsics
1665 if (IID != Intrinsic::not_intrinsic) {
1666 rename(F);
1667 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1668 return true;
1669 }
1670
1671 // Upgrade TMA copy G2S Intrinsics
1673 if (IID != Intrinsic::not_intrinsic) {
1674 rename(F);
1675 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1676 return true;
1677 }
1678
1679 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1680 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1681 //
1682 // TODO: We could add lohi.i2d.
1683 bool Expand = false;
1684 if (Name.consume_front("abs."))
1685 // nvvm.abs.{i,ii}
1686 Expand =
1687 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1688 else if (Name.consume_front("fabs."))
1689 // nvvm.fabs.{f,ftz.f,d}
1690 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1691 else if (Name.consume_front("ex2.approx."))
1692 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1693 Expand =
1694 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1695 else if (Name.consume_front("atomic.load."))
1696 // nvvm.atomic.load.add.{f32,f64}.p
1697 // nvvm.atomic.load.{inc,dec}.32.p
1698 Expand = StringSwitch<bool>(Name)
1699 .StartsWith("add.f32.p", true)
1700 .StartsWith("add.f64.p", true)
1701 .StartsWith("inc.32.p", true)
1702 .StartsWith("dec.32.p", true)
1703 .Default(false);
1704 else if (Name.consume_front("atomic."))
1705 // nvvm.atomic.{add,exch,max,min,inc,dec,and,or,xor}.gen.{i,f}.{cta,sys}
1706 // nvvm.atomic.cas.gen.i.{cta,sys}
1707 Expand = StringSwitch<bool>(Name)
1708 .StartsWith("add.gen.", true)
1709 .StartsWith("exch.gen.", true)
1710 .StartsWith("max.gen.", true)
1711 .StartsWith("min.gen.", true)
1712 .StartsWith("inc.gen.", true)
1713 .StartsWith("dec.gen.", true)
1714 .StartsWith("and.gen.", true)
1715 .StartsWith("or.gen.", true)
1716 .StartsWith("xor.gen.", true)
1717 .StartsWith("cas.gen.", true)
1718 .Default(false);
1719 else if (Name.consume_front("bitcast."))
1720 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1721 Expand =
1722 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1723 else if (Name.consume_front("rotate."))
1724 // nvvm.rotate.{b32,b64,right.b64}
1725 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1726 else if (Name.consume_front("ptr.gen.to."))
1727 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1728 Expand = consumeNVVMPtrAddrSpace(Name);
1729 else if (Name.consume_front("ptr."))
1730 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1731 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1732 else if (Name.consume_front("ldg.global."))
1733 // nvvm.ldg.global.{i,p,f}
1734 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1735 Name.starts_with("p."));
1736 else
1737 Expand = StringSwitch<bool>(Name)
1738 .Case("barrier0", true)
1739 .Case("barrier.n", true)
1740 .Case("barrier.sync.cnt", true)
1741 .Case("barrier.sync", true)
1742 .Case("barrier", true)
1743 .Case("bar.sync", true)
1744 .Case("barrier0.popc", true)
1745 .Case("barrier0.and", true)
1746 .Case("barrier0.or", true)
1747 .Case("clz.ll", true)
1748 .Case("popc.ll", true)
1749 .Case("h2f", true)
1750 .Case("swap.lo.hi.b64", true)
1751 .Case("tanh.approx.f32", true)
1752 .Default(false);
1753
1754 if (Expand) {
1755 NewFn = nullptr;
1756 return true;
1757 }
1758 break; // No other 'nvvm.*'.
1759 }
1760 break;
1761 }
1762 case 'o':
1763 if (Name.starts_with("objectsize.")) {
1764 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1765 if (F->arg_size() == 2 || F->arg_size() == 3) {
1766 rename(F);
1767 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1768 Intrinsic::objectsize, Tys);
1769 return true;
1770 }
1771 }
1772 break;
1773
1774 case 'p':
1775 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1776 rename(F);
1778 F->getParent(), Intrinsic::ptr_annotation,
1779 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1780 return true;
1781 }
1782 break;
1783
1784 case 'r': {
1785 if (Name.consume_front("riscv.")) {
1788 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1789 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1790 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1791 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1794 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1795 rename(F);
1796 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1797 return true;
1798 }
1799 break; // No other applicable upgrades.
1800 }
1801
1803 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1804 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1807 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1808 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1809 rename(F);
1810 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1811 return true;
1812 }
1813 break; // No other applicable upgrades.
1814 }
1815
1817 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1818 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1819 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1820 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1821 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1822 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1825 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1826 rename(F);
1827 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1828 return true;
1829 }
1830 break; // No other applicable upgrades.
1831 }
1832
1833 // Replace llvm.riscv.clmul with llvm.clmul.
1834 if (Name == "clmul.i32" || Name == "clmul.i64") {
1836 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1837 return true;
1838 }
1839
1840 break; // No other 'riscv.*' intrinsics
1841 }
1842 } break;
1843
1844 case 's':
1845 if (Name == "stackprotectorcheck") {
1846 NewFn = nullptr;
1847 return true;
1848 }
1849 break;
1850
1851 case 't':
1852 if (Name == "thread.pointer") {
1854 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1855 return true;
1856 }
1857 break;
1858
1859 case 'v': {
1860 if (Name == "var.annotation" && F->arg_size() == 4) {
1861 rename(F);
1863 F->getParent(), Intrinsic::var_annotation,
1864 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1865 return true;
1866 }
1867 if (Name.consume_front("vector.splice")) {
1868 if (Name.starts_with(".left") || Name.starts_with(".right"))
1869 break;
1870 return true;
1871 }
1872 break;
1873 }
1874
1875 case 'w':
1876 if (Name.consume_front("wasm.")) {
1879 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1880 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1881 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1884 rename(F);
1885 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1886 F->getReturnType());
1887 return true;
1888 }
1889
1890 if (Name.consume_front("dot.i8x16.i7x16.")) {
1892 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1893 .Case("add.signed",
1894 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1897 rename(F);
1898 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1899 return true;
1900 }
1901 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1902 }
1903 break; // No other 'wasm.*'.
1904 }
1905 break;
1906
1907 case 'x':
1908 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1909 return true;
1910 }
1911
1912 auto *ST = dyn_cast<StructType>(F->getReturnType());
1913 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1914 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1915 // Replace return type with literal non-packed struct. Only do this for
1916 // intrinsics declared to return a struct, not for intrinsics with
1917 // overloaded return type, in which case the exact struct type will be
1918 // mangled into the name.
1919 if (Intrinsic::hasStructReturnType(F->getIntrinsicID())) {
1920 FunctionType *FT = F->getFunctionType();
1921 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1922 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1923 std::string Name = F->getName().str();
1924 rename(F);
1925 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1926 Name, F->getParent());
1927
1928 // The new function may also need remangling.
1929 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1930 NewFn = *Result;
1931 return true;
1932 }
1933 }
1934
1935 // Remangle our intrinsic since we upgrade the mangling
1937 if (Result != std::nullopt) {
1938 NewFn = *Result;
1939 return true;
1940 }
1941
1942 // This may not belong here. This function is effectively being overloaded
1943 // to both detect an intrinsic which needs upgrading, and to provide the
1944 // upgraded form of the intrinsic. We should perhaps have two separate
1945 // functions for this.
1946 return false;
1947}
1948
1950 bool CanUpgradeDebugIntrinsicsToRecords) {
1951 NewFn = nullptr;
1952 bool Upgraded =
1953 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1954
1955 // Upgrade intrinsic attributes. This does not change the function.
1956 if (NewFn)
1957 F = NewFn;
1958 if (Intrinsic::ID id = F->getIntrinsicID()) {
1959 // Only do this if the intrinsic signature is valid.
1960 SmallVector<Type *> OverloadTys;
1961 if (Intrinsic::isSignatureValid(id, F->getFunctionType(), OverloadTys))
1962 F->setAttributes(
1963 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1964 }
1965 return Upgraded;
1966}
1967
1969 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1970 GV->getName() == "llvm.global_dtors")) ||
1971 !GV->hasInitializer())
1972 return nullptr;
1974 if (!ATy)
1975 return nullptr;
1977 if (!STy || STy->getNumElements() != 2)
1978 return nullptr;
1979
1980 LLVMContext &C = GV->getContext();
1981 IRBuilder<> IRB(C);
1982 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1983 IRB.getPtrTy());
1984 Constant *Init = GV->getInitializer();
1985 unsigned N = Init->getNumOperands();
1986 std::vector<Constant *> NewCtors(N);
1987 for (unsigned i = 0; i != N; ++i) {
1988 auto Ctor = cast<Constant>(Init->getOperand(i));
1989 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1990 Ctor->getAggregateElement(1),
1992 }
1993 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1994
1995 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1996 NewInit, GV->getName());
1997}
1998
1999// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
2000// to byte shuffles.
2002 unsigned Shift) {
2003 auto *ResultTy = cast<FixedVectorType>(Op->getType());
2004 unsigned NumElts = ResultTy->getNumElements() * 8;
2005
2006 // Bitcast from a 64-bit element type to a byte element type.
2007 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
2008 Op = Builder.CreateBitCast(Op, VecTy, "cast");
2009
2010 // We'll be shuffling in zeroes.
2011 Value *Res = Constant::getNullValue(VecTy);
2012
2013 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2014 // we'll just return the zero vector.
2015 if (Shift < 16) {
2016 int Idxs[64];
2017 // 256/512-bit version is split into 2/4 16-byte lanes.
2018 for (unsigned l = 0; l != NumElts; l += 16)
2019 for (unsigned i = 0; i != 16; ++i) {
2020 unsigned Idx = NumElts + i - Shift;
2021 if (Idx < NumElts)
2022 Idx -= NumElts - 16; // end of lane, switch operand.
2023 Idxs[l + i] = Idx + l;
2024 }
2025
2026 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
2027 }
2028
2029 // Bitcast back to a 64-bit element type.
2030 return Builder.CreateBitCast(Res, ResultTy, "cast");
2031}
2032
2033// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
2034// to byte shuffles.
2036 unsigned Shift) {
2037 auto *ResultTy = cast<FixedVectorType>(Op->getType());
2038 unsigned NumElts = ResultTy->getNumElements() * 8;
2039
2040 // Bitcast from a 64-bit element type to a byte element type.
2041 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
2042 Op = Builder.CreateBitCast(Op, VecTy, "cast");
2043
2044 // We'll be shuffling in zeroes.
2045 Value *Res = Constant::getNullValue(VecTy);
2046
2047 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2048 // we'll just return the zero vector.
2049 if (Shift < 16) {
2050 int Idxs[64];
2051 // 256/512-bit version is split into 2/4 16-byte lanes.
2052 for (unsigned l = 0; l != NumElts; l += 16)
2053 for (unsigned i = 0; i != 16; ++i) {
2054 unsigned Idx = i + Shift;
2055 if (Idx >= 16)
2056 Idx += NumElts - 16; // end of lane, switch operand.
2057 Idxs[l + i] = Idx + l;
2058 }
2059
2060 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
2061 }
2062
2063 // Bitcast back to a 64-bit element type.
2064 return Builder.CreateBitCast(Res, ResultTy, "cast");
2065}
2066
2067static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2068 unsigned NumElts) {
2069 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2071 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2072 Mask = Builder.CreateBitCast(Mask, MaskTy);
2073
2074 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2075 // i8 and we need to extract down to the right number of elements.
2076 if (NumElts <= 4) {
2077 int Indices[4];
2078 for (unsigned i = 0; i != NumElts; ++i)
2079 Indices[i] = i;
2080 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2081 "extract");
2082 }
2083
2084 return Mask;
2085}
2086
2087static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2088 Value *Op1) {
2089 // If the mask is all ones just emit the first operation.
2090 if (const auto *C = dyn_cast<Constant>(Mask))
2091 if (C->isAllOnesValue())
2092 return Op0;
2093
2094 Mask = getX86MaskVec(Builder, Mask,
2095 cast<FixedVectorType>(Op0->getType())->getNumElements());
2096 return Builder.CreateSelect(Mask, Op0, Op1);
2097}
2098
2099static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2100 Value *Op1) {
2101 // If the mask is all ones just emit the first operation.
2102 if (const auto *C = dyn_cast<Constant>(Mask))
2103 if (C->isAllOnesValue())
2104 return Op0;
2105
2106 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2107 Mask->getType()->getIntegerBitWidth());
2108 Mask = Builder.CreateBitCast(Mask, MaskTy);
2109 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2110 return Builder.CreateSelect(Mask, Op0, Op1);
2111}
2112
2113// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2114// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2115// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2117 Value *Op1, Value *Shift,
2118 Value *Passthru, Value *Mask,
2119 bool IsVALIGN) {
2120 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2121
2122 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2123 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2124 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2125 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2126
2127 // Mask the immediate for VALIGN.
2128 if (IsVALIGN)
2129 ShiftVal &= (NumElts - 1);
2130
2131 // If palignr is shifting the pair of vectors more than the size of two
2132 // lanes, emit zero.
2133 if (ShiftVal >= 32)
2135
2136 // If palignr is shifting the pair of input vectors more than one lane,
2137 // but less than two lanes, convert to shifting in zeroes.
2138 if (ShiftVal > 16) {
2139 ShiftVal -= 16;
2140 Op1 = Op0;
2142 }
2143
2144 int Indices[64];
2145 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2146 for (unsigned l = 0; l < NumElts; l += 16) {
2147 for (unsigned i = 0; i != 16; ++i) {
2148 unsigned Idx = ShiftVal + i;
2149 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2150 Idx += NumElts - 16; // End of lane, switch operand.
2151 Indices[l + i] = Idx + l;
2152 }
2153 }
2154
2155 Value *Align = Builder.CreateShuffleVector(
2156 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2157
2158 return emitX86Select(Builder, Mask, Align, Passthru);
2159}
2160
2162 bool ZeroMask, bool IndexForm) {
2163 Type *Ty = CI.getType();
2164 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2165 unsigned EltWidth = Ty->getScalarSizeInBits();
2166 bool IsFloat = Ty->isFPOrFPVectorTy();
2167 Intrinsic::ID IID;
2168 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2169 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2170 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2171 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2172 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2173 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2174 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2175 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2176 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2177 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2178 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2179 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2180 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2181 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2182 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2183 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2184 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2185 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2186 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2187 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2188 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2189 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2190 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2191 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2192 else if (VecWidth == 128 && EltWidth == 16)
2193 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2194 else if (VecWidth == 256 && EltWidth == 16)
2195 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2196 else if (VecWidth == 512 && EltWidth == 16)
2197 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2198 else if (VecWidth == 128 && EltWidth == 8)
2199 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2200 else if (VecWidth == 256 && EltWidth == 8)
2201 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2202 else if (VecWidth == 512 && EltWidth == 8)
2203 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2204 else
2205 llvm_unreachable("Unexpected intrinsic");
2206
2207 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2208 CI.getArgOperand(2) };
2209
2210 // If this isn't index form we need to swap operand 0 and 1.
2211 if (!IndexForm)
2212 std::swap(Args[0], Args[1]);
2213
2214 Value *V = Builder.CreateIntrinsic(IID, Args);
2215 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2216 : Builder.CreateBitCast(CI.getArgOperand(1),
2217 Ty);
2218 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2219}
2220
2222 Intrinsic::ID IID) {
2223 Type *Ty = CI.getType();
2224 Value *Op0 = CI.getOperand(0);
2225 Value *Op1 = CI.getOperand(1);
2226 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2227
2228 if (CI.arg_size() == 4) { // For masked intrinsics.
2229 Value *VecSrc = CI.getOperand(2);
2230 Value *Mask = CI.getOperand(3);
2231 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2232 }
2233 return Res;
2234}
2235
2237 bool IsRotateRight) {
2238 Type *Ty = CI.getType();
2239 Value *Src = CI.getArgOperand(0);
2240 Value *Amt = CI.getArgOperand(1);
2241
2242 // Amount may be scalar immediate, in which case create a splat vector.
2243 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2244 // we only care about the lowest log2 bits anyway.
2245 if (Amt->getType() != Ty) {
2246 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2247 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2248 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2249 }
2250
2251 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2252 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2253
2254 if (CI.arg_size() == 4) { // For masked intrinsics.
2255 Value *VecSrc = CI.getOperand(2);
2256 Value *Mask = CI.getOperand(3);
2257 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2258 }
2259 return Res;
2260}
2261
2262static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2263 bool IsSigned) {
2264 Type *Ty = CI.getType();
2265 Value *LHS = CI.getArgOperand(0);
2266 Value *RHS = CI.getArgOperand(1);
2267
2268 CmpInst::Predicate Pred;
2269 switch (Imm) {
2270 case 0x0:
2271 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2272 break;
2273 case 0x1:
2274 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2275 break;
2276 case 0x2:
2277 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2278 break;
2279 case 0x3:
2280 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2281 break;
2282 case 0x4:
2283 Pred = ICmpInst::ICMP_EQ;
2284 break;
2285 case 0x5:
2286 Pred = ICmpInst::ICMP_NE;
2287 break;
2288 case 0x6:
2289 return Constant::getNullValue(Ty); // FALSE
2290 case 0x7:
2291 return Constant::getAllOnesValue(Ty); // TRUE
2292 default:
2293 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2294 }
2295
2296 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2297 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2298 return Ext;
2299}
2300
2302 bool IsShiftRight, bool ZeroMask) {
2303 Type *Ty = CI.getType();
2304 Value *Op0 = CI.getArgOperand(0);
2305 Value *Op1 = CI.getArgOperand(1);
2306 Value *Amt = CI.getArgOperand(2);
2307
2308 if (IsShiftRight)
2309 std::swap(Op0, Op1);
2310
2311 // Amount may be scalar immediate, in which case create a splat vector.
2312 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2313 // we only care about the lowest log2 bits anyway.
2314 if (Amt->getType() != Ty) {
2315 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2316 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2317 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2318 }
2319
2320 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2321 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2322
2323 unsigned NumArgs = CI.arg_size();
2324 if (NumArgs >= 4) { // For masked intrinsics.
2325 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2326 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2327 CI.getArgOperand(0);
2328 Value *Mask = CI.getOperand(NumArgs - 1);
2329 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2330 }
2331 return Res;
2332}
2333
2335 Value *Mask, bool Aligned) {
2336 const Align Alignment =
2337 Aligned
2338 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2339 : Align(1);
2340
2341 // If the mask is all ones just emit a regular store.
2342 if (const auto *C = dyn_cast<Constant>(Mask))
2343 if (C->isAllOnesValue())
2344 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2345
2346 // Convert the mask from an integer type to a vector of i1.
2347 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2348 Mask = getX86MaskVec(Builder, Mask, NumElts);
2349 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2350}
2351
2353 Value *Passthru, Value *Mask, bool Aligned) {
2354 Type *ValTy = Passthru->getType();
2355 const Align Alignment =
2356 Aligned
2357 ? Align(
2359 8)
2360 : Align(1);
2361
2362 // If the mask is all ones just emit a regular store.
2363 if (const auto *C = dyn_cast<Constant>(Mask))
2364 if (C->isAllOnesValue())
2365 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2366
2367 // Convert the mask from an integer type to a vector of i1.
2368 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2369 Mask = getX86MaskVec(Builder, Mask, NumElts);
2370 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2371}
2372
2373static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2374 Type *Ty = CI.getType();
2375 Value *Op0 = CI.getArgOperand(0);
2376 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2377 {Op0, Builder.getInt1(false)});
2378 if (CI.arg_size() == 3)
2379 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2380 return Res;
2381}
2382
2383static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2384 Type *Ty = CI.getType();
2385
2386 // Arguments have a vXi32 type so cast to vXi64.
2387 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2388 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2389
2390 if (IsSigned) {
2391 // Shift left then arithmetic shift right.
2392 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2393 LHS = Builder.CreateShl(LHS, ShiftAmt);
2394 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2395 RHS = Builder.CreateShl(RHS, ShiftAmt);
2396 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2397 } else {
2398 // Clear the upper bits.
2399 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2400 LHS = Builder.CreateAnd(LHS, Mask);
2401 RHS = Builder.CreateAnd(RHS, Mask);
2402 }
2403
2404 Value *Res = Builder.CreateMul(LHS, RHS);
2405
2406 if (CI.arg_size() == 4)
2407 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2408
2409 return Res;
2410}
2411
2412// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2414 Value *Mask) {
2415 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2416 if (Mask) {
2417 const auto *C = dyn_cast<Constant>(Mask);
2418 if (!C || !C->isAllOnesValue())
2419 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2420 }
2421
2422 if (NumElts < 8) {
2423 int Indices[8];
2424 for (unsigned i = 0; i != NumElts; ++i)
2425 Indices[i] = i;
2426 for (unsigned i = NumElts; i != 8; ++i)
2427 Indices[i] = NumElts + i % NumElts;
2428 Vec = Builder.CreateShuffleVector(Vec,
2430 Indices);
2431 }
2432 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2433}
2434
2436 unsigned CC, bool Signed) {
2437 Value *Op0 = CI.getArgOperand(0);
2438 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2439
2440 Value *Cmp;
2441 if (CC == 3) {
2443 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2444 } else if (CC == 7) {
2446 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2447 } else {
2449 switch (CC) {
2450 default: llvm_unreachable("Unknown condition code");
2451 case 0: Pred = ICmpInst::ICMP_EQ; break;
2452 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2453 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2454 case 4: Pred = ICmpInst::ICMP_NE; break;
2455 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2456 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2457 }
2458 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2459 }
2460
2461 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2462
2463 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2464}
2465
2466// Replace a masked intrinsic with an older unmasked intrinsic.
2468 Intrinsic::ID IID) {
2469 Value *Rep =
2470 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2471 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2472}
2473
2475 Value* A = CI.getArgOperand(0);
2476 Value* B = CI.getArgOperand(1);
2477 Value* Src = CI.getArgOperand(2);
2478 Value* Mask = CI.getArgOperand(3);
2479
2480 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2481 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2482 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2483 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2484 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2485 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2486}
2487
2489 Value* Op = CI.getArgOperand(0);
2490 Type* ReturnOp = CI.getType();
2491 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2492 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2493 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2494}
2495
2496// Replace intrinsic with unmasked version and a select.
2498 CallBase &CI, Value *&Rep) {
2499 Name = Name.substr(12); // Remove avx512.mask.
2500
2501 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2502 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2503 Intrinsic::ID IID;
2504 if (Name.starts_with("max.p")) {
2505 if (VecWidth == 128 && EltWidth == 32)
2506 IID = Intrinsic::x86_sse_max_ps;
2507 else if (VecWidth == 128 && EltWidth == 64)
2508 IID = Intrinsic::x86_sse2_max_pd;
2509 else if (VecWidth == 256 && EltWidth == 32)
2510 IID = Intrinsic::x86_avx_max_ps_256;
2511 else if (VecWidth == 256 && EltWidth == 64)
2512 IID = Intrinsic::x86_avx_max_pd_256;
2513 else
2514 llvm_unreachable("Unexpected intrinsic");
2515 } else if (Name.starts_with("min.p")) {
2516 if (VecWidth == 128 && EltWidth == 32)
2517 IID = Intrinsic::x86_sse_min_ps;
2518 else if (VecWidth == 128 && EltWidth == 64)
2519 IID = Intrinsic::x86_sse2_min_pd;
2520 else if (VecWidth == 256 && EltWidth == 32)
2521 IID = Intrinsic::x86_avx_min_ps_256;
2522 else if (VecWidth == 256 && EltWidth == 64)
2523 IID = Intrinsic::x86_avx_min_pd_256;
2524 else
2525 llvm_unreachable("Unexpected intrinsic");
2526 } else if (Name.starts_with("pshuf.b.")) {
2527 if (VecWidth == 128)
2528 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2529 else if (VecWidth == 256)
2530 IID = Intrinsic::x86_avx2_pshuf_b;
2531 else if (VecWidth == 512)
2532 IID = Intrinsic::x86_avx512_pshuf_b_512;
2533 else
2534 llvm_unreachable("Unexpected intrinsic");
2535 } else if (Name.starts_with("pmul.hr.sw.")) {
2536 if (VecWidth == 128)
2537 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2538 else if (VecWidth == 256)
2539 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2540 else if (VecWidth == 512)
2541 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2542 else
2543 llvm_unreachable("Unexpected intrinsic");
2544 } else if (Name.starts_with("pmulh.w.")) {
2545 if (VecWidth == 128)
2546 IID = Intrinsic::x86_sse2_pmulh_w;
2547 else if (VecWidth == 256)
2548 IID = Intrinsic::x86_avx2_pmulh_w;
2549 else if (VecWidth == 512)
2550 IID = Intrinsic::x86_avx512_pmulh_w_512;
2551 else
2552 llvm_unreachable("Unexpected intrinsic");
2553 } else if (Name.starts_with("pmulhu.w.")) {
2554 if (VecWidth == 128)
2555 IID = Intrinsic::x86_sse2_pmulhu_w;
2556 else if (VecWidth == 256)
2557 IID = Intrinsic::x86_avx2_pmulhu_w;
2558 else if (VecWidth == 512)
2559 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2560 else
2561 llvm_unreachable("Unexpected intrinsic");
2562 } else if (Name.starts_with("pmaddw.d.")) {
2563 if (VecWidth == 128)
2564 IID = Intrinsic::x86_sse2_pmadd_wd;
2565 else if (VecWidth == 256)
2566 IID = Intrinsic::x86_avx2_pmadd_wd;
2567 else if (VecWidth == 512)
2568 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2569 else
2570 llvm_unreachable("Unexpected intrinsic");
2571 } else if (Name.starts_with("pmaddubs.w.")) {
2572 if (VecWidth == 128)
2573 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2574 else if (VecWidth == 256)
2575 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2576 else if (VecWidth == 512)
2577 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2578 else
2579 llvm_unreachable("Unexpected intrinsic");
2580 } else if (Name.starts_with("packsswb.")) {
2581 if (VecWidth == 128)
2582 IID = Intrinsic::x86_sse2_packsswb_128;
2583 else if (VecWidth == 256)
2584 IID = Intrinsic::x86_avx2_packsswb;
2585 else if (VecWidth == 512)
2586 IID = Intrinsic::x86_avx512_packsswb_512;
2587 else
2588 llvm_unreachable("Unexpected intrinsic");
2589 } else if (Name.starts_with("packssdw.")) {
2590 if (VecWidth == 128)
2591 IID = Intrinsic::x86_sse2_packssdw_128;
2592 else if (VecWidth == 256)
2593 IID = Intrinsic::x86_avx2_packssdw;
2594 else if (VecWidth == 512)
2595 IID = Intrinsic::x86_avx512_packssdw_512;
2596 else
2597 llvm_unreachable("Unexpected intrinsic");
2598 } else if (Name.starts_with("packuswb.")) {
2599 if (VecWidth == 128)
2600 IID = Intrinsic::x86_sse2_packuswb_128;
2601 else if (VecWidth == 256)
2602 IID = Intrinsic::x86_avx2_packuswb;
2603 else if (VecWidth == 512)
2604 IID = Intrinsic::x86_avx512_packuswb_512;
2605 else
2606 llvm_unreachable("Unexpected intrinsic");
2607 } else if (Name.starts_with("packusdw.")) {
2608 if (VecWidth == 128)
2609 IID = Intrinsic::x86_sse41_packusdw;
2610 else if (VecWidth == 256)
2611 IID = Intrinsic::x86_avx2_packusdw;
2612 else if (VecWidth == 512)
2613 IID = Intrinsic::x86_avx512_packusdw_512;
2614 else
2615 llvm_unreachable("Unexpected intrinsic");
2616 } else if (Name.starts_with("vpermilvar.")) {
2617 if (VecWidth == 128 && EltWidth == 32)
2618 IID = Intrinsic::x86_avx_vpermilvar_ps;
2619 else if (VecWidth == 128 && EltWidth == 64)
2620 IID = Intrinsic::x86_avx_vpermilvar_pd;
2621 else if (VecWidth == 256 && EltWidth == 32)
2622 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2623 else if (VecWidth == 256 && EltWidth == 64)
2624 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2625 else if (VecWidth == 512 && EltWidth == 32)
2626 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2627 else if (VecWidth == 512 && EltWidth == 64)
2628 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2629 else
2630 llvm_unreachable("Unexpected intrinsic");
2631 } else if (Name == "cvtpd2dq.256") {
2632 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2633 } else if (Name == "cvtpd2ps.256") {
2634 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2635 } else if (Name == "cvttpd2dq.256") {
2636 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2637 } else if (Name == "cvttps2dq.128") {
2638 IID = Intrinsic::x86_sse2_cvttps2dq;
2639 } else if (Name == "cvttps2dq.256") {
2640 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2641 } else if (Name.starts_with("permvar.")) {
2642 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2643 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2644 IID = Intrinsic::x86_avx2_permps;
2645 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2646 IID = Intrinsic::x86_avx2_permd;
2647 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2648 IID = Intrinsic::x86_avx512_permvar_df_256;
2649 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2650 IID = Intrinsic::x86_avx512_permvar_di_256;
2651 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2652 IID = Intrinsic::x86_avx512_permvar_sf_512;
2653 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2654 IID = Intrinsic::x86_avx512_permvar_si_512;
2655 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2656 IID = Intrinsic::x86_avx512_permvar_df_512;
2657 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2658 IID = Intrinsic::x86_avx512_permvar_di_512;
2659 else if (VecWidth == 128 && EltWidth == 16)
2660 IID = Intrinsic::x86_avx512_permvar_hi_128;
2661 else if (VecWidth == 256 && EltWidth == 16)
2662 IID = Intrinsic::x86_avx512_permvar_hi_256;
2663 else if (VecWidth == 512 && EltWidth == 16)
2664 IID = Intrinsic::x86_avx512_permvar_hi_512;
2665 else if (VecWidth == 128 && EltWidth == 8)
2666 IID = Intrinsic::x86_avx512_permvar_qi_128;
2667 else if (VecWidth == 256 && EltWidth == 8)
2668 IID = Intrinsic::x86_avx512_permvar_qi_256;
2669 else if (VecWidth == 512 && EltWidth == 8)
2670 IID = Intrinsic::x86_avx512_permvar_qi_512;
2671 else
2672 llvm_unreachable("Unexpected intrinsic");
2673 } else if (Name.starts_with("dbpsadbw.")) {
2674 if (VecWidth == 128)
2675 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2676 else if (VecWidth == 256)
2677 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2678 else if (VecWidth == 512)
2679 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2680 else
2681 llvm_unreachable("Unexpected intrinsic");
2682 } else if (Name.starts_with("pmultishift.qb.")) {
2683 if (VecWidth == 128)
2684 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2685 else if (VecWidth == 256)
2686 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2687 else if (VecWidth == 512)
2688 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2689 else
2690 llvm_unreachable("Unexpected intrinsic");
2691 } else if (Name.starts_with("conflict.")) {
2692 if (Name[9] == 'd' && VecWidth == 128)
2693 IID = Intrinsic::x86_avx512_conflict_d_128;
2694 else if (Name[9] == 'd' && VecWidth == 256)
2695 IID = Intrinsic::x86_avx512_conflict_d_256;
2696 else if (Name[9] == 'd' && VecWidth == 512)
2697 IID = Intrinsic::x86_avx512_conflict_d_512;
2698 else if (Name[9] == 'q' && VecWidth == 128)
2699 IID = Intrinsic::x86_avx512_conflict_q_128;
2700 else if (Name[9] == 'q' && VecWidth == 256)
2701 IID = Intrinsic::x86_avx512_conflict_q_256;
2702 else if (Name[9] == 'q' && VecWidth == 512)
2703 IID = Intrinsic::x86_avx512_conflict_q_512;
2704 else
2705 llvm_unreachable("Unexpected intrinsic");
2706 } else if (Name.starts_with("pavg.")) {
2707 if (Name[5] == 'b' && VecWidth == 128)
2708 IID = Intrinsic::x86_sse2_pavg_b;
2709 else if (Name[5] == 'b' && VecWidth == 256)
2710 IID = Intrinsic::x86_avx2_pavg_b;
2711 else if (Name[5] == 'b' && VecWidth == 512)
2712 IID = Intrinsic::x86_avx512_pavg_b_512;
2713 else if (Name[5] == 'w' && VecWidth == 128)
2714 IID = Intrinsic::x86_sse2_pavg_w;
2715 else if (Name[5] == 'w' && VecWidth == 256)
2716 IID = Intrinsic::x86_avx2_pavg_w;
2717 else if (Name[5] == 'w' && VecWidth == 512)
2718 IID = Intrinsic::x86_avx512_pavg_w_512;
2719 else
2720 llvm_unreachable("Unexpected intrinsic");
2721 } else
2722 return false;
2723
2724 SmallVector<Value *, 4> Args(CI.args());
2725 Args.pop_back();
2726 Args.pop_back();
2727 Rep = Builder.CreateIntrinsic(IID, Args);
2728 unsigned NumArgs = CI.arg_size();
2729 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2730 CI.getArgOperand(NumArgs - 2));
2731 return true;
2732}
2733
2734/// Upgrade comment in call to inline asm that represents an objc retain release
2735/// marker.
2736void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2737 size_t Pos;
2738 if (AsmStr->find("mov\tfp") == 0 &&
2739 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2740 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2741 AsmStr->replace(Pos, 1, ";");
2742 }
2743}
2744
2746 Function *F, IRBuilder<> &Builder) {
2747 Value *Rep = nullptr;
2748
2749 if (Name == "abs.i" || Name == "abs.ll") {
2750 Value *Arg = CI->getArgOperand(0);
2751 Rep = Builder.CreateIntrinsic(Intrinsic::abs, {Arg->getType()},
2752 {Arg, Builder.getTrue()},
2753 /*FMFSource=*/nullptr, "abs");
2754 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2755 Type *Ty = (Name == "abs.bf16")
2756 ? Builder.getBFloatTy()
2757 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2758 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2759 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2760 Rep = Builder.CreateBitCast(Abs, CI->getType());
2761 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2762 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2763 : Intrinsic::nvvm_fabs;
2764 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2765 } else if (Name.consume_front("ex2.approx.")) {
2766 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2767 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2768 : Intrinsic::nvvm_ex2_approx;
2769 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2770 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2771 Name.starts_with("atomic.load.add.f64.p")) {
2772 Value *Ptr = CI->getArgOperand(0);
2773 Value *Val = CI->getArgOperand(1);
2774 Rep = Builder.CreateAtomicRMW(
2776 CI->getContext().getOrInsertSyncScopeID("device"));
2777 // The default scope for atomic.load.* intrinsics is device
2778 // (= gpu scope in ptx), but the default LLVM atomic scope is
2779 // "system"
2780 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2781 Name.starts_with("atomic.load.dec.32.p")) {
2782 Value *Ptr = CI->getArgOperand(0);
2783 Value *Val = CI->getArgOperand(1);
2784 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2786 Rep = Builder.CreateAtomicRMW(
2788 CI->getContext().getOrInsertSyncScopeID("device"));
2789 // See comment above.
2790 } else if (Name.starts_with("atomic.") && Name.contains(".gen.")) {
2791 // nvvm.atomic.{op}.gen.{i,f}.{cta,sys} -> atomicrmw / cmpxchg.
2792 StringRef Op = Name.substr(StringRef("atomic.").size());
2793 Value *Ptr = CI->getArgOperand(0);
2794 Value *Val = CI->getArgOperand(1);
2796 Op.contains(".cta.") ? "block" : "");
2797 if (Op.starts_with("cas.")) {
2798 Value *New = CI->getArgOperand(2);
2799 Value *Pair = Builder.CreateAtomicCmpXchg(
2800 Ptr, Val, New, MaybeAlign(), AtomicOrdering::Monotonic,
2802 Rep = Builder.CreateExtractValue(Pair, 0);
2803 } else {
2804 // Note we don't upgrade anything to AtomicRMWInst::UMin/UMax. This is
2805 // because we were actually missing those intrinsics!
2806 AtomicRMWInst::BinOp BinOp =
2808 .StartsWith("add.gen.f", AtomicRMWInst::FAdd)
2809 .StartsWith("add.gen.i", AtomicRMWInst::Add)
2820 "unexpected nvvm scoped atomic intrinsic");
2821 Rep = Builder.CreateAtomicRMW(BinOp, Ptr, Val, MaybeAlign(),
2823 }
2824 } else if (Name == "clz.ll") {
2825 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2826 Value *Arg = CI->getArgOperand(0);
2827 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2828 {Arg, Builder.getFalse()},
2829 /*FMFSource=*/nullptr, "ctlz");
2830 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2831 } else if (Name == "popc.ll") {
2832 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2833 // i64.
2834 Value *Arg = CI->getArgOperand(0);
2835 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2836 Arg, /*FMFSource=*/nullptr, "ctpop");
2837 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2838 } else if (Name == "h2f") {
2839 Value *Cast =
2840 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2841 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2842 } else if (Name.consume_front("bitcast.") &&
2843 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2844 Name == "d2ll")) {
2845 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2846 } else if (Name == "rotate.b32") {
2847 Value *Arg = CI->getOperand(0);
2848 Value *ShiftAmt = CI->getOperand(1);
2849 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2850 {Arg, Arg, ShiftAmt});
2851 } else if (Name == "rotate.b64") {
2852 Type *Int64Ty = Builder.getInt64Ty();
2853 Value *Arg = CI->getOperand(0);
2854 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2855 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2856 {Arg, Arg, ZExtShiftAmt});
2857 } else if (Name == "rotate.right.b64") {
2858 Type *Int64Ty = Builder.getInt64Ty();
2859 Value *Arg = CI->getOperand(0);
2860 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2861 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2862 {Arg, Arg, ZExtShiftAmt});
2863 } else if (Name == "swap.lo.hi.b64") {
2864 Type *Int64Ty = Builder.getInt64Ty();
2865 Value *Arg = CI->getOperand(0);
2866 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2867 {Arg, Arg, Builder.getInt64(32)});
2868 } else if ((Name.consume_front("ptr.gen.to.") &&
2869 consumeNVVMPtrAddrSpace(Name)) ||
2870 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2871 Name.starts_with(".to.gen"))) {
2872 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2873 } else if (Name.consume_front("ldg.global")) {
2874 Value *Ptr = CI->getArgOperand(0);
2875 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2876 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2877 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2878 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2879 MDNode *MD = MDNode::get(Builder.getContext(), {});
2880 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2881 return LD;
2882 } else if (Name == "tanh.approx.f32") {
2883 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2884 FastMathFlags FMF;
2885 FMF.setApproxFunc();
2886 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2887 FMF);
2888 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2889 Value *Arg =
2890 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2891 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2892 {}, {Arg});
2893 } else if (Name == "barrier") {
2894 Rep = Builder.CreateIntrinsic(
2895 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2896 {CI->getArgOperand(0), CI->getArgOperand(1)});
2897 } else if (Name == "barrier.sync") {
2898 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2899 {CI->getArgOperand(0)});
2900 } else if (Name == "barrier.sync.cnt") {
2901 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2902 {CI->getArgOperand(0), CI->getArgOperand(1)});
2903 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2904 Name == "barrier0.or") {
2905 Value *C = CI->getArgOperand(0);
2906 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2907
2908 Intrinsic::ID IID =
2910 .Case("barrier0.popc",
2911 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2912 .Case("barrier0.and",
2913 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2914 .Case("barrier0.or",
2915 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2916 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2917 Rep = Builder.CreateZExt(Bar, CI->getType());
2918 } else {
2920 if (IID != Intrinsic::not_intrinsic &&
2921 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2922 rename(F);
2923 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2925 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2926 Value *Arg = CI->getArgOperand(I);
2927 Type *OldType = Arg->getType();
2928 Type *NewType = NewFn->getArg(I)->getType();
2929 Args.push_back(
2930 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2931 ? Builder.CreateBitCast(Arg, NewType)
2932 : Arg);
2933 }
2934 Rep = Builder.CreateCall(NewFn, Args);
2935 if (F->getReturnType()->isIntegerTy())
2936 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2937 }
2938 }
2939
2940 return Rep;
2941}
2942
2944 IRBuilder<> &Builder) {
2945 LLVMContext &C = F->getContext();
2946 Value *Rep = nullptr;
2947
2948 if (Name.starts_with("sse4a.movnt.")) {
2950 Elts.push_back(
2951 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2952 MDNode *Node = MDNode::get(C, Elts);
2953
2954 Value *Arg0 = CI->getArgOperand(0);
2955 Value *Arg1 = CI->getArgOperand(1);
2956
2957 // Nontemporal (unaligned) store of the 0'th element of the float/double
2958 // vector.
2959 Value *Extract =
2960 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2961
2962 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2963 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2964 } else if (Name.starts_with("avx.movnt.") ||
2965 Name.starts_with("avx512.storent.")) {
2967 Elts.push_back(
2968 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2969 MDNode *Node = MDNode::get(C, Elts);
2970
2971 Value *Arg0 = CI->getArgOperand(0);
2972 Value *Arg1 = CI->getArgOperand(1);
2973
2974 StoreInst *SI = Builder.CreateAlignedStore(
2975 Arg1, Arg0,
2977 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2978 } else if (Name == "sse2.storel.dq") {
2979 Value *Arg0 = CI->getArgOperand(0);
2980 Value *Arg1 = CI->getArgOperand(1);
2981
2982 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2983 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2984 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2985 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2986 } else if (Name.starts_with("sse.storeu.") ||
2987 Name.starts_with("sse2.storeu.") ||
2988 Name.starts_with("avx.storeu.")) {
2989 Value *Arg0 = CI->getArgOperand(0);
2990 Value *Arg1 = CI->getArgOperand(1);
2991 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2992 } else if (Name == "avx512.mask.store.ss") {
2993 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2994 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2995 Mask, false);
2996 } else if (Name.starts_with("avx512.mask.store")) {
2997 // "avx512.mask.storeu." or "avx512.mask.store."
2998 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2999 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3000 CI->getArgOperand(2), Aligned);
3001 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
3002 // Upgrade packed integer vector compare intrinsics to compare instructions.
3003 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
3004 bool CmpEq = Name[9] == 'e';
3005 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
3006 CI->getArgOperand(0), CI->getArgOperand(1));
3007 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
3008 } else if (Name.starts_with("avx512.broadcastm")) {
3009 Type *ExtTy = Type::getInt32Ty(C);
3010 if (CI->getOperand(0)->getType()->isIntegerTy(8))
3011 ExtTy = Type::getInt64Ty(C);
3012 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
3013 ExtTy->getPrimitiveSizeInBits();
3014 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
3015 Rep = Builder.CreateVectorSplat(NumElts, Rep);
3016 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
3017 Value *Vec = CI->getArgOperand(0);
3018 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
3019 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
3020 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
3021 } else if (Name.starts_with("avx.sqrt.p") ||
3022 Name.starts_with("sse2.sqrt.p") ||
3023 Name.starts_with("sse.sqrt.p")) {
3024 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
3025 {CI->getArgOperand(0)});
3026 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
3027 if (CI->arg_size() == 4 &&
3028 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3029 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3030 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
3031 : Intrinsic::x86_avx512_sqrt_pd_512;
3032
3033 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
3034 Rep = Builder.CreateIntrinsic(IID, Args);
3035 } else {
3036 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
3037 {CI->getArgOperand(0)});
3038 }
3039 Rep =
3040 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3041 } else if (Name.starts_with("avx512.ptestm") ||
3042 Name.starts_with("avx512.ptestnm")) {
3043 Value *Op0 = CI->getArgOperand(0);
3044 Value *Op1 = CI->getArgOperand(1);
3045 Value *Mask = CI->getArgOperand(2);
3046 Rep = Builder.CreateAnd(Op0, Op1);
3047 llvm::Type *Ty = Op0->getType();
3049 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
3052 Rep = Builder.CreateICmp(Pred, Rep, Zero);
3053 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
3054 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
3055 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
3056 ->getNumElements();
3057 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
3058 Rep =
3059 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3060 } else if (Name.starts_with("avx512.kunpck")) {
3061 unsigned NumElts = CI->getType()->getScalarSizeInBits();
3062 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
3063 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
3064 int Indices[64];
3065 for (unsigned i = 0; i != NumElts; ++i)
3066 Indices[i] = i;
3067
3068 // First extract half of each vector. This gives better codegen than
3069 // doing it in a single shuffle.
3070 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
3071 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
3072 // Concat the vectors.
3073 // NOTE: Operands have to be swapped to match intrinsic definition.
3074 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
3075 Rep = Builder.CreateBitCast(Rep, CI->getType());
3076 } else if (Name == "avx512.kand.w") {
3077 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3078 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3079 Rep = Builder.CreateAnd(LHS, RHS);
3080 Rep = Builder.CreateBitCast(Rep, CI->getType());
3081 } else if (Name == "avx512.kandn.w") {
3082 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3083 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3084 LHS = Builder.CreateNot(LHS);
3085 Rep = Builder.CreateAnd(LHS, RHS);
3086 Rep = Builder.CreateBitCast(Rep, CI->getType());
3087 } else if (Name == "avx512.kor.w") {
3088 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3089 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3090 Rep = Builder.CreateOr(LHS, RHS);
3091 Rep = Builder.CreateBitCast(Rep, CI->getType());
3092 } else if (Name == "avx512.kxor.w") {
3093 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3094 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3095 Rep = Builder.CreateXor(LHS, RHS);
3096 Rep = Builder.CreateBitCast(Rep, CI->getType());
3097 } else if (Name == "avx512.kxnor.w") {
3098 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3099 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3100 LHS = Builder.CreateNot(LHS);
3101 Rep = Builder.CreateXor(LHS, RHS);
3102 Rep = Builder.CreateBitCast(Rep, CI->getType());
3103 } else if (Name == "avx512.knot.w") {
3104 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3105 Rep = Builder.CreateNot(Rep);
3106 Rep = Builder.CreateBitCast(Rep, CI->getType());
3107 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3108 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3109 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3110 Rep = Builder.CreateOr(LHS, RHS);
3111 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3112 Value *C;
3113 if (Name[14] == 'c')
3114 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3115 else
3116 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3117 Rep = Builder.CreateICmpEQ(Rep, C);
3118 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3119 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3120 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3121 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3122 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3123 Type *I32Ty = Type::getInt32Ty(C);
3124 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3125 ConstantInt::get(I32Ty, 0));
3126 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3127 ConstantInt::get(I32Ty, 0));
3128 Value *EltOp;
3129 if (Name.contains(".add."))
3130 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3131 else if (Name.contains(".sub."))
3132 EltOp = Builder.CreateFSub(Elt0, Elt1);
3133 else if (Name.contains(".mul."))
3134 EltOp = Builder.CreateFMul(Elt0, Elt1);
3135 else
3136 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3137 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3138 ConstantInt::get(I32Ty, 0));
3139 } else if (Name.starts_with("avx512.mask.pcmp")) {
3140 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3141 bool CmpEq = Name[16] == 'e';
3142 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3143 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3144 Type *OpTy = CI->getArgOperand(0)->getType();
3145 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3146 Intrinsic::ID IID;
3147 switch (VecWidth) {
3148 default:
3149 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3150 break;
3151 case 128:
3152 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3153 break;
3154 case 256:
3155 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3156 break;
3157 case 512:
3158 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3159 break;
3160 }
3161
3162 Rep =
3163 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3164 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3165 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3166 Type *OpTy = CI->getArgOperand(0)->getType();
3167 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3168 unsigned EltWidth = OpTy->getScalarSizeInBits();
3169 Intrinsic::ID IID;
3170 if (VecWidth == 128 && EltWidth == 32)
3171 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3172 else if (VecWidth == 256 && EltWidth == 32)
3173 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3174 else if (VecWidth == 512 && EltWidth == 32)
3175 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3176 else if (VecWidth == 128 && EltWidth == 64)
3177 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3178 else if (VecWidth == 256 && EltWidth == 64)
3179 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3180 else if (VecWidth == 512 && EltWidth == 64)
3181 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3182 else
3183 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3184
3185 Rep =
3186 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3187 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3188 } else if (Name.starts_with("avx512.cmp.p")) {
3189 SmallVector<Value *, 4> Args(CI->args());
3190 Type *OpTy = Args[0]->getType();
3191 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3192 unsigned EltWidth = OpTy->getScalarSizeInBits();
3193 Intrinsic::ID IID;
3194 if (VecWidth == 128 && EltWidth == 32)
3195 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3196 else if (VecWidth == 256 && EltWidth == 32)
3197 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3198 else if (VecWidth == 512 && EltWidth == 32)
3199 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3200 else if (VecWidth == 128 && EltWidth == 64)
3201 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3202 else if (VecWidth == 256 && EltWidth == 64)
3203 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3204 else if (VecWidth == 512 && EltWidth == 64)
3205 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3206 else
3207 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3208
3210 if (VecWidth == 512)
3211 std::swap(Mask, Args.back());
3212 Args.push_back(Mask);
3213
3214 Rep = Builder.CreateIntrinsic(IID, Args);
3215 } else if (Name.starts_with("avx512.mask.cmp.")) {
3216 // Integer compare intrinsics.
3217 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3218 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3219 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3220 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3221 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3222 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3223 Name.starts_with("avx512.cvtw2mask.") ||
3224 Name.starts_with("avx512.cvtd2mask.") ||
3225 Name.starts_with("avx512.cvtq2mask.")) {
3226 Value *Op = CI->getArgOperand(0);
3227 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3228 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3229 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3230 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3231 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3232 Name.starts_with("avx512.mask.pabs")) {
3233 Rep = upgradeAbs(Builder, *CI);
3234 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3235 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3236 Name.starts_with("avx512.mask.pmaxs")) {
3237 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3238 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3239 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3240 Name.starts_with("avx512.mask.pmaxu")) {
3241 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3242 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3243 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3244 Name.starts_with("avx512.mask.pmins")) {
3245 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3246 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3247 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3248 Name.starts_with("avx512.mask.pminu")) {
3249 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3250 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3251 Name == "avx512.pmulu.dq.512" ||
3252 Name.starts_with("avx512.mask.pmulu.dq.")) {
3253 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3254 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3255 Name == "avx512.pmul.dq.512" ||
3256 Name.starts_with("avx512.mask.pmul.dq.")) {
3257 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3258 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3259 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3260 Rep =
3261 Builder.CreateSIToFP(CI->getArgOperand(1),
3262 cast<VectorType>(CI->getType())->getElementType());
3263 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3264 } else if (Name == "avx512.cvtusi2sd") {
3265 Rep =
3266 Builder.CreateUIToFP(CI->getArgOperand(1),
3267 cast<VectorType>(CI->getType())->getElementType());
3268 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3269 } else if (Name == "sse2.cvtss2sd") {
3270 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3271 Rep = Builder.CreateFPExt(
3272 Rep, cast<VectorType>(CI->getType())->getElementType());
3273 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3274 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3275 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3276 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3277 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3278 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3279 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3280 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3281 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3282 Name == "avx512.mask.cvtqq2ps.256" ||
3283 Name == "avx512.mask.cvtqq2ps.512" ||
3284 Name == "avx512.mask.cvtuqq2ps.256" ||
3285 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3286 Name == "avx.cvt.ps2.pd.256" ||
3287 Name == "avx512.mask.cvtps2pd.128" ||
3288 Name == "avx512.mask.cvtps2pd.256") {
3289 auto *DstTy = cast<FixedVectorType>(CI->getType());
3290 Rep = CI->getArgOperand(0);
3291 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3292
3293 unsigned NumDstElts = DstTy->getNumElements();
3294 if (NumDstElts < SrcTy->getNumElements()) {
3295 assert(NumDstElts == 2 && "Unexpected vector size");
3296 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3297 }
3298
3299 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3300 bool IsUnsigned = Name.contains("cvtu");
3301 if (IsPS2PD)
3302 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3303 else if (CI->arg_size() == 4 &&
3304 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3305 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3306 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3307 : Intrinsic::x86_avx512_sitofp_round;
3308 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3309 {Rep, CI->getArgOperand(3)});
3310 } else {
3311 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3312 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3313 }
3314
3315 if (CI->arg_size() >= 3)
3316 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3317 CI->getArgOperand(1));
3318 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3319 Name.starts_with("vcvtph2ps.")) {
3320 auto *DstTy = cast<FixedVectorType>(CI->getType());
3321 Rep = CI->getArgOperand(0);
3322 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3323 unsigned NumDstElts = DstTy->getNumElements();
3324 if (NumDstElts != SrcTy->getNumElements()) {
3325 assert(NumDstElts == 4 && "Unexpected vector size");
3326 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3327 }
3328 Rep = Builder.CreateBitCast(
3329 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3330 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3331 if (CI->arg_size() >= 3)
3332 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3333 CI->getArgOperand(1));
3334 } else if (Name.starts_with("avx512.mask.load")) {
3335 // "avx512.mask.loadu." or "avx512.mask.load."
3336 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3337 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3338 CI->getArgOperand(2), Aligned);
3339 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3340 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3341 auto *PtrTy = CI->getOperand(0)->getType();
3342 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3343 ResultTy->getNumElements());
3344 Rep = Builder.CreateIntrinsic(
3345 Intrinsic::masked_expandload, {ResultTy, PtrTy},
3346 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3347 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3348 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3349 auto *PtrTy = CI->getArgOperand(0)->getType();
3350 Value *MaskVec =
3351 getX86MaskVec(Builder, CI->getArgOperand(2),
3352 cast<FixedVectorType>(ResultTy)->getNumElements());
3353 Rep = Builder.CreateIntrinsic(
3354 Intrinsic::masked_compressstore, {ResultTy, PtrTy},
3355 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3356 } else if (Name.starts_with("avx512.mask.compress.") ||
3357 Name.starts_with("avx512.mask.expand.")) {
3358 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3359
3360 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3361 ResultTy->getNumElements());
3362
3363 bool IsCompress = Name[12] == 'c';
3364 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3365 : Intrinsic::x86_avx512_mask_expand;
3366 Rep = Builder.CreateIntrinsic(
3367 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3368 } else if (Name.starts_with("xop.vpcom")) {
3369 bool IsSigned;
3370 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3371 Name.ends_with("uq"))
3372 IsSigned = false;
3373 else if (Name.ends_with("b") || Name.ends_with("w") ||
3374 Name.ends_with("d") || Name.ends_with("q"))
3375 IsSigned = true;
3376 else
3377 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3378
3379 unsigned Imm;
3380 if (CI->arg_size() == 3) {
3381 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3382 } else {
3383 Name = Name.substr(9); // strip off "xop.vpcom"
3384 if (Name.starts_with("lt"))
3385 Imm = 0;
3386 else if (Name.starts_with("le"))
3387 Imm = 1;
3388 else if (Name.starts_with("gt"))
3389 Imm = 2;
3390 else if (Name.starts_with("ge"))
3391 Imm = 3;
3392 else if (Name.starts_with("eq"))
3393 Imm = 4;
3394 else if (Name.starts_with("ne"))
3395 Imm = 5;
3396 else if (Name.starts_with("false"))
3397 Imm = 6;
3398 else if (Name.starts_with("true"))
3399 Imm = 7;
3400 else
3401 llvm_unreachable("Unknown condition");
3402 }
3403
3404 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3405 } else if (Name.starts_with("xop.vpcmov")) {
3406 Value *Sel = CI->getArgOperand(2);
3407 Value *NotSel = Builder.CreateNot(Sel);
3408 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3409 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3410 Rep = Builder.CreateOr(Sel0, Sel1);
3411 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3412 Name.starts_with("avx512.mask.prol")) {
3413 Rep = upgradeX86Rotate(Builder, *CI, false);
3414 } else if (Name.starts_with("avx512.pror") ||
3415 Name.starts_with("avx512.mask.pror")) {
3416 Rep = upgradeX86Rotate(Builder, *CI, true);
3417 } else if (Name.starts_with("avx512.vpshld.") ||
3418 Name.starts_with("avx512.mask.vpshld") ||
3419 Name.starts_with("avx512.maskz.vpshld")) {
3420 bool ZeroMask = Name[11] == 'z';
3421 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3422 } else if (Name.starts_with("avx512.vpshrd.") ||
3423 Name.starts_with("avx512.mask.vpshrd") ||
3424 Name.starts_with("avx512.maskz.vpshrd")) {
3425 bool ZeroMask = Name[11] == 'z';
3426 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3427 } else if (Name == "sse42.crc32.64.8") {
3428 Value *Trunc0 =
3429 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3430 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3431 {Trunc0, CI->getArgOperand(1)});
3432 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3433 } else if (Name.starts_with("avx.vbroadcast.s") ||
3434 Name.starts_with("avx512.vbroadcast.s")) {
3435 // Replace broadcasts with a series of insertelements.
3436 auto *VecTy = cast<FixedVectorType>(CI->getType());
3437 Type *EltTy = VecTy->getElementType();
3438 unsigned EltNum = VecTy->getNumElements();
3439 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3440 Type *I32Ty = Type::getInt32Ty(C);
3441 Rep = PoisonValue::get(VecTy);
3442 for (unsigned I = 0; I < EltNum; ++I)
3443 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3444 } else if (Name.starts_with("sse41.pmovsx") ||
3445 Name.starts_with("sse41.pmovzx") ||
3446 Name.starts_with("avx2.pmovsx") ||
3447 Name.starts_with("avx2.pmovzx") ||
3448 Name.starts_with("avx512.mask.pmovsx") ||
3449 Name.starts_with("avx512.mask.pmovzx")) {
3450 auto *DstTy = cast<FixedVectorType>(CI->getType());
3451 unsigned NumDstElts = DstTy->getNumElements();
3452
3453 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3454 SmallVector<int, 8> ShuffleMask(NumDstElts);
3455 for (unsigned i = 0; i != NumDstElts; ++i)
3456 ShuffleMask[i] = i;
3457
3458 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3459
3460 bool DoSext = Name.contains("pmovsx");
3461 Rep =
3462 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3463 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3464 if (CI->arg_size() == 3)
3465 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3466 CI->getArgOperand(1));
3467 } else if (Name == "avx512.mask.pmov.qd.256" ||
3468 Name == "avx512.mask.pmov.qd.512" ||
3469 Name == "avx512.mask.pmov.wb.256" ||
3470 Name == "avx512.mask.pmov.wb.512") {
3471 Type *Ty = CI->getArgOperand(1)->getType();
3472 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3473 Rep =
3474 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3475 } else if (Name.starts_with("avx.vbroadcastf128") ||
3476 Name == "avx2.vbroadcasti128") {
3477 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3478 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3479 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3480 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3481 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3482 if (NumSrcElts == 2)
3483 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3484 else
3485 Rep = Builder.CreateShuffleVector(Load,
3486 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3487 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3488 Name.starts_with("avx512.mask.shuf.f")) {
3489 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3490 Type *VT = CI->getType();
3491 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3492 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3493 unsigned ControlBitsMask = NumLanes - 1;
3494 unsigned NumControlBits = NumLanes / 2;
3495 SmallVector<int, 8> ShuffleMask(0);
3496
3497 for (unsigned l = 0; l != NumLanes; ++l) {
3498 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3499 // We actually need the other source.
3500 if (l >= NumLanes / 2)
3501 LaneMask += NumLanes;
3502 for (unsigned i = 0; i != NumElementsInLane; ++i)
3503 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3504 }
3505 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3506 CI->getArgOperand(1), ShuffleMask);
3507 Rep =
3508 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3509 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3510 Name.starts_with("avx512.mask.broadcasti")) {
3511 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3512 ->getNumElements();
3513 unsigned NumDstElts =
3514 cast<FixedVectorType>(CI->getType())->getNumElements();
3515
3516 SmallVector<int, 8> ShuffleMask(NumDstElts);
3517 for (unsigned i = 0; i != NumDstElts; ++i)
3518 ShuffleMask[i] = i % NumSrcElts;
3519
3520 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3521 CI->getArgOperand(0), ShuffleMask);
3522 Rep =
3523 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3524 } else if (Name.starts_with("avx2.pbroadcast") ||
3525 Name.starts_with("avx2.vbroadcast") ||
3526 Name.starts_with("avx512.pbroadcast") ||
3527 Name.starts_with("avx512.mask.broadcast.s")) {
3528 // Replace vp?broadcasts with a vector shuffle.
3529 Value *Op = CI->getArgOperand(0);
3530 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3531 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3534 Rep = Builder.CreateShuffleVector(Op, M);
3535
3536 if (CI->arg_size() == 3)
3537 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3538 CI->getArgOperand(1));
3539 } else if (Name.starts_with("sse2.padds.") ||
3540 Name.starts_with("avx2.padds.") ||
3541 Name.starts_with("avx512.padds.") ||
3542 Name.starts_with("avx512.mask.padds.")) {
3543 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3544 } else if (Name.starts_with("sse2.psubs.") ||
3545 Name.starts_with("avx2.psubs.") ||
3546 Name.starts_with("avx512.psubs.") ||
3547 Name.starts_with("avx512.mask.psubs.")) {
3548 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3549 } else if (Name.starts_with("sse2.paddus.") ||
3550 Name.starts_with("avx2.paddus.") ||
3551 Name.starts_with("avx512.mask.paddus.")) {
3552 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3553 } else if (Name.starts_with("sse2.psubus.") ||
3554 Name.starts_with("avx2.psubus.") ||
3555 Name.starts_with("avx512.mask.psubus.")) {
3556 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3557 } else if (Name.starts_with("avx512.mask.palignr.")) {
3558 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3559 CI->getArgOperand(1), CI->getArgOperand(2),
3560 CI->getArgOperand(3), CI->getArgOperand(4),
3561 false);
3562 } else if (Name.starts_with("avx512.mask.valign.")) {
3564 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3565 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3566 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3567 // 128/256-bit shift left specified in bits.
3568 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3569 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3570 Shift / 8); // Shift is in bits.
3571 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3572 // 128/256-bit shift right specified in bits.
3573 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3574 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3575 Shift / 8); // Shift is in bits.
3576 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3577 Name == "avx512.psll.dq.512") {
3578 // 128/256/512-bit shift left specified in bytes.
3579 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3580 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3581 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3582 Name == "avx512.psrl.dq.512") {
3583 // 128/256/512-bit shift right specified in bytes.
3584 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3585 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3586 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3587 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3588 Name.starts_with("avx2.pblendd.")) {
3589 Value *Op0 = CI->getArgOperand(0);
3590 Value *Op1 = CI->getArgOperand(1);
3591 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3592 auto *VecTy = cast<FixedVectorType>(CI->getType());
3593 unsigned NumElts = VecTy->getNumElements();
3594
3595 SmallVector<int, 16> Idxs(NumElts);
3596 for (unsigned i = 0; i != NumElts; ++i)
3597 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3598
3599 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3600 } else if (Name.starts_with("avx.vinsertf128.") ||
3601 Name == "avx2.vinserti128" ||
3602 Name.starts_with("avx512.mask.insert")) {
3603 Value *Op0 = CI->getArgOperand(0);
3604 Value *Op1 = CI->getArgOperand(1);
3605 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3606 unsigned DstNumElts =
3607 cast<FixedVectorType>(CI->getType())->getNumElements();
3608 unsigned SrcNumElts =
3609 cast<FixedVectorType>(Op1->getType())->getNumElements();
3610 unsigned Scale = DstNumElts / SrcNumElts;
3611
3612 // Mask off the high bits of the immediate value; hardware ignores those.
3613 Imm = Imm % Scale;
3614
3615 // Extend the second operand into a vector the size of the destination.
3616 SmallVector<int, 8> Idxs(DstNumElts);
3617 for (unsigned i = 0; i != SrcNumElts; ++i)
3618 Idxs[i] = i;
3619 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3620 Idxs[i] = SrcNumElts;
3621 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3622
3623 // Insert the second operand into the first operand.
3624
3625 // Note that there is no guarantee that instruction lowering will actually
3626 // produce a vinsertf128 instruction for the created shuffles. In
3627 // particular, the 0 immediate case involves no lane changes, so it can
3628 // be handled as a blend.
3629
3630 // Example of shuffle mask for 32-bit elements:
3631 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3632 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3633
3634 // First fill with identify mask.
3635 for (unsigned i = 0; i != DstNumElts; ++i)
3636 Idxs[i] = i;
3637 // Then replace the elements where we need to insert.
3638 for (unsigned i = 0; i != SrcNumElts; ++i)
3639 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3640 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3641
3642 // If the intrinsic has a mask operand, handle that.
3643 if (CI->arg_size() == 5)
3644 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3645 CI->getArgOperand(3));
3646 } else if (Name.starts_with("avx.vextractf128.") ||
3647 Name == "avx2.vextracti128" ||
3648 Name.starts_with("avx512.mask.vextract")) {
3649 Value *Op0 = CI->getArgOperand(0);
3650 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3651 unsigned DstNumElts =
3652 cast<FixedVectorType>(CI->getType())->getNumElements();
3653 unsigned SrcNumElts =
3654 cast<FixedVectorType>(Op0->getType())->getNumElements();
3655 unsigned Scale = SrcNumElts / DstNumElts;
3656
3657 // Mask off the high bits of the immediate value; hardware ignores those.
3658 Imm = Imm % Scale;
3659
3660 // Get indexes for the subvector of the input vector.
3661 SmallVector<int, 8> Idxs(DstNumElts);
3662 for (unsigned i = 0; i != DstNumElts; ++i) {
3663 Idxs[i] = i + (Imm * DstNumElts);
3664 }
3665 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3666
3667 // If the intrinsic has a mask operand, handle that.
3668 if (CI->arg_size() == 4)
3669 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3670 CI->getArgOperand(2));
3671 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3672 Name.starts_with("avx512.mask.perm.di.")) {
3673 Value *Op0 = CI->getArgOperand(0);
3674 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3675 auto *VecTy = cast<FixedVectorType>(CI->getType());
3676 unsigned NumElts = VecTy->getNumElements();
3677
3678 SmallVector<int, 8> Idxs(NumElts);
3679 for (unsigned i = 0; i != NumElts; ++i)
3680 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3681
3682 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3683
3684 if (CI->arg_size() == 4)
3685 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3686 CI->getArgOperand(2));
3687 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3688 // The immediate permute control byte looks like this:
3689 // [1:0] - select 128 bits from sources for low half of destination
3690 // [2] - ignore
3691 // [3] - zero low half of destination
3692 // [5:4] - select 128 bits from sources for high half of destination
3693 // [6] - ignore
3694 // [7] - zero high half of destination
3695
3696 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3697
3698 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3699 unsigned HalfSize = NumElts / 2;
3700 SmallVector<int, 8> ShuffleMask(NumElts);
3701
3702 // Determine which operand(s) are actually in use for this instruction.
3703 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3704 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3705
3706 // If needed, replace operands based on zero mask.
3707 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3708 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3709
3710 // Permute low half of result.
3711 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3712 for (unsigned i = 0; i < HalfSize; ++i)
3713 ShuffleMask[i] = StartIndex + i;
3714
3715 // Permute high half of result.
3716 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3717 for (unsigned i = 0; i < HalfSize; ++i)
3718 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3719
3720 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3721
3722 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3723 Name.starts_with("avx512.mask.vpermil.p") ||
3724 Name.starts_with("avx512.mask.pshuf.d.")) {
3725 Value *Op0 = CI->getArgOperand(0);
3726 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3727 auto *VecTy = cast<FixedVectorType>(CI->getType());
3728 unsigned NumElts = VecTy->getNumElements();
3729 // Calculate the size of each index in the immediate.
3730 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3731 unsigned IdxMask = ((1 << IdxSize) - 1);
3732
3733 SmallVector<int, 8> Idxs(NumElts);
3734 // Lookup the bits for this element, wrapping around the immediate every
3735 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3736 // to offset by the first index of each group.
3737 for (unsigned i = 0; i != NumElts; ++i)
3738 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3739
3740 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3741
3742 if (CI->arg_size() == 4)
3743 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3744 CI->getArgOperand(2));
3745 } else if (Name == "sse2.pshufl.w" ||
3746 Name.starts_with("avx512.mask.pshufl.w.")) {
3747 Value *Op0 = CI->getArgOperand(0);
3748 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3749 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3750
3751 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3752 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3753
3754 SmallVector<int, 16> Idxs(NumElts);
3755 for (unsigned l = 0; l != NumElts; l += 8) {
3756 for (unsigned i = 0; i != 4; ++i)
3757 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3758 for (unsigned i = 4; i != 8; ++i)
3759 Idxs[i + l] = i + l;
3760 }
3761
3762 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3763
3764 if (CI->arg_size() == 4)
3765 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3766 CI->getArgOperand(2));
3767 } else if (Name == "sse2.pshufh.w" ||
3768 Name.starts_with("avx512.mask.pshufh.w.")) {
3769 Value *Op0 = CI->getArgOperand(0);
3770 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3771 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3772
3773 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3774 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3775
3776 SmallVector<int, 16> Idxs(NumElts);
3777 for (unsigned l = 0; l != NumElts; l += 8) {
3778 for (unsigned i = 0; i != 4; ++i)
3779 Idxs[i + l] = i + l;
3780 for (unsigned i = 0; i != 4; ++i)
3781 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3782 }
3783
3784 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3785
3786 if (CI->arg_size() == 4)
3787 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3788 CI->getArgOperand(2));
3789 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3790 Value *Op0 = CI->getArgOperand(0);
3791 Value *Op1 = CI->getArgOperand(1);
3792 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3793 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3794
3795 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3796 unsigned HalfLaneElts = NumLaneElts / 2;
3797
3798 SmallVector<int, 16> Idxs(NumElts);
3799 for (unsigned i = 0; i != NumElts; ++i) {
3800 // Base index is the starting element of the lane.
3801 Idxs[i] = i - (i % NumLaneElts);
3802 // If we are half way through the lane switch to the other source.
3803 if ((i % NumLaneElts) >= HalfLaneElts)
3804 Idxs[i] += NumElts;
3805 // Now select the specific element. By adding HalfLaneElts bits from
3806 // the immediate. Wrapping around the immediate every 8-bits.
3807 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3808 }
3809
3810 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3811
3812 Rep =
3813 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3814 } else if (Name.starts_with("avx512.mask.movddup") ||
3815 Name.starts_with("avx512.mask.movshdup") ||
3816 Name.starts_with("avx512.mask.movsldup")) {
3817 Value *Op0 = CI->getArgOperand(0);
3818 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3819 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3820
3821 unsigned Offset = 0;
3822 if (Name.starts_with("avx512.mask.movshdup."))
3823 Offset = 1;
3824
3825 SmallVector<int, 16> Idxs(NumElts);
3826 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3827 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3828 Idxs[i + l + 0] = i + l + Offset;
3829 Idxs[i + l + 1] = i + l + Offset;
3830 }
3831
3832 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3833
3834 Rep =
3835 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3836 } else if (Name.starts_with("avx512.mask.punpckl") ||
3837 Name.starts_with("avx512.mask.unpckl.")) {
3838 Value *Op0 = CI->getArgOperand(0);
3839 Value *Op1 = CI->getArgOperand(1);
3840 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3841 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3842
3843 SmallVector<int, 64> Idxs(NumElts);
3844 for (int l = 0; l != NumElts; l += NumLaneElts)
3845 for (int i = 0; i != NumLaneElts; ++i)
3846 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3847
3848 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3849
3850 Rep =
3851 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3852 } else if (Name.starts_with("avx512.mask.punpckh") ||
3853 Name.starts_with("avx512.mask.unpckh.")) {
3854 Value *Op0 = CI->getArgOperand(0);
3855 Value *Op1 = CI->getArgOperand(1);
3856 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3857 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3858
3859 SmallVector<int, 64> Idxs(NumElts);
3860 for (int l = 0; l != NumElts; l += NumLaneElts)
3861 for (int i = 0; i != NumLaneElts; ++i)
3862 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3863
3864 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3865
3866 Rep =
3867 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3868 } else if (Name.starts_with("avx512.mask.and.") ||
3869 Name.starts_with("avx512.mask.pand.")) {
3870 VectorType *FTy = cast<VectorType>(CI->getType());
3872 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3873 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3874 Rep = Builder.CreateBitCast(Rep, FTy);
3875 Rep =
3876 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3877 } else if (Name.starts_with("avx512.mask.andn.") ||
3878 Name.starts_with("avx512.mask.pandn.")) {
3879 VectorType *FTy = cast<VectorType>(CI->getType());
3881 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3882 Rep = Builder.CreateAnd(Rep,
3883 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3884 Rep = Builder.CreateBitCast(Rep, FTy);
3885 Rep =
3886 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3887 } else if (Name.starts_with("avx512.mask.or.") ||
3888 Name.starts_with("avx512.mask.por.")) {
3889 VectorType *FTy = cast<VectorType>(CI->getType());
3891 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3892 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3893 Rep = Builder.CreateBitCast(Rep, FTy);
3894 Rep =
3895 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3896 } else if (Name.starts_with("avx512.mask.xor.") ||
3897 Name.starts_with("avx512.mask.pxor.")) {
3898 VectorType *FTy = cast<VectorType>(CI->getType());
3900 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3901 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3902 Rep = Builder.CreateBitCast(Rep, FTy);
3903 Rep =
3904 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3905 } else if (Name.starts_with("avx512.mask.padd.")) {
3906 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3907 Rep =
3908 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3909 } else if (Name.starts_with("avx512.mask.psub.")) {
3910 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3911 Rep =
3912 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3913 } else if (Name.starts_with("avx512.mask.pmull.")) {
3914 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3915 Rep =
3916 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3917 } else if (Name.starts_with("avx512.mask.add.p")) {
3918 if (Name.ends_with(".512")) {
3919 Intrinsic::ID IID;
3920 if (Name[17] == 's')
3921 IID = Intrinsic::x86_avx512_add_ps_512;
3922 else
3923 IID = Intrinsic::x86_avx512_add_pd_512;
3924
3925 Rep = Builder.CreateIntrinsic(
3926 IID,
3927 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3928 } else {
3929 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3930 }
3931 Rep =
3932 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3933 } else if (Name.starts_with("avx512.mask.div.p")) {
3934 if (Name.ends_with(".512")) {
3935 Intrinsic::ID IID;
3936 if (Name[17] == 's')
3937 IID = Intrinsic::x86_avx512_div_ps_512;
3938 else
3939 IID = Intrinsic::x86_avx512_div_pd_512;
3940
3941 Rep = Builder.CreateIntrinsic(
3942 IID,
3943 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3944 } else {
3945 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3946 }
3947 Rep =
3948 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3949 } else if (Name.starts_with("avx512.mask.mul.p")) {
3950 if (Name.ends_with(".512")) {
3951 Intrinsic::ID IID;
3952 if (Name[17] == 's')
3953 IID = Intrinsic::x86_avx512_mul_ps_512;
3954 else
3955 IID = Intrinsic::x86_avx512_mul_pd_512;
3956
3957 Rep = Builder.CreateIntrinsic(
3958 IID,
3959 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3960 } else {
3961 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3962 }
3963 Rep =
3964 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3965 } else if (Name.starts_with("avx512.mask.sub.p")) {
3966 if (Name.ends_with(".512")) {
3967 Intrinsic::ID IID;
3968 if (Name[17] == 's')
3969 IID = Intrinsic::x86_avx512_sub_ps_512;
3970 else
3971 IID = Intrinsic::x86_avx512_sub_pd_512;
3972
3973 Rep = Builder.CreateIntrinsic(
3974 IID,
3975 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3976 } else {
3977 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3978 }
3979 Rep =
3980 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3981 } else if ((Name.starts_with("avx512.mask.max.p") ||
3982 Name.starts_with("avx512.mask.min.p")) &&
3983 Name.drop_front(18) == ".512") {
3984 bool IsDouble = Name[17] == 'd';
3985 bool IsMin = Name[13] == 'i';
3986 static const Intrinsic::ID MinMaxTbl[2][2] = {
3987 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3988 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3989 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3990
3991 Rep = Builder.CreateIntrinsic(
3992 IID,
3993 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3994 Rep =
3995 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3996 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3997 Rep =
3998 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3999 {CI->getArgOperand(0), Builder.getInt1(false)});
4000 Rep =
4001 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
4002 } else if (Name.starts_with("avx512.mask.psll")) {
4003 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4004 bool IsVariable = Name[16] == 'v';
4005 char Size = Name[16] == '.' ? Name[17]
4006 : Name[17] == '.' ? Name[18]
4007 : Name[18] == '.' ? Name[19]
4008 : Name[20];
4009
4010 Intrinsic::ID IID;
4011 if (IsVariable && Name[17] != '.') {
4012 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
4013 IID = Intrinsic::x86_avx2_psllv_q;
4014 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
4015 IID = Intrinsic::x86_avx2_psllv_q_256;
4016 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
4017 IID = Intrinsic::x86_avx2_psllv_d;
4018 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
4019 IID = Intrinsic::x86_avx2_psllv_d_256;
4020 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
4021 IID = Intrinsic::x86_avx512_psllv_w_128;
4022 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
4023 IID = Intrinsic::x86_avx512_psllv_w_256;
4024 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
4025 IID = Intrinsic::x86_avx512_psllv_w_512;
4026 else
4027 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4028 } else if (Name.ends_with(".128")) {
4029 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
4030 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
4031 : Intrinsic::x86_sse2_psll_d;
4032 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
4033 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
4034 : Intrinsic::x86_sse2_psll_q;
4035 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
4036 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
4037 : Intrinsic::x86_sse2_psll_w;
4038 else
4039 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4040 } else if (Name.ends_with(".256")) {
4041 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
4042 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
4043 : Intrinsic::x86_avx2_psll_d;
4044 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
4045 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
4046 : Intrinsic::x86_avx2_psll_q;
4047 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
4048 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
4049 : Intrinsic::x86_avx2_psll_w;
4050 else
4051 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4052 } else {
4053 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
4054 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
4055 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
4056 : Intrinsic::x86_avx512_psll_d_512;
4057 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
4058 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
4059 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
4060 : Intrinsic::x86_avx512_psll_q_512;
4061 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
4062 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
4063 : Intrinsic::x86_avx512_psll_w_512;
4064 else
4065 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4066 }
4067
4068 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4069 } else if (Name.starts_with("avx512.mask.psrl")) {
4070 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4071 bool IsVariable = Name[16] == 'v';
4072 char Size = Name[16] == '.' ? Name[17]
4073 : Name[17] == '.' ? Name[18]
4074 : Name[18] == '.' ? Name[19]
4075 : Name[20];
4076
4077 Intrinsic::ID IID;
4078 if (IsVariable && Name[17] != '.') {
4079 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
4080 IID = Intrinsic::x86_avx2_psrlv_q;
4081 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
4082 IID = Intrinsic::x86_avx2_psrlv_q_256;
4083 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
4084 IID = Intrinsic::x86_avx2_psrlv_d;
4085 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
4086 IID = Intrinsic::x86_avx2_psrlv_d_256;
4087 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
4088 IID = Intrinsic::x86_avx512_psrlv_w_128;
4089 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
4090 IID = Intrinsic::x86_avx512_psrlv_w_256;
4091 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
4092 IID = Intrinsic::x86_avx512_psrlv_w_512;
4093 else
4094 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4095 } else if (Name.ends_with(".128")) {
4096 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4097 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4098 : Intrinsic::x86_sse2_psrl_d;
4099 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4100 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4101 : Intrinsic::x86_sse2_psrl_q;
4102 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4103 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4104 : Intrinsic::x86_sse2_psrl_w;
4105 else
4106 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4107 } else if (Name.ends_with(".256")) {
4108 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4109 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4110 : Intrinsic::x86_avx2_psrl_d;
4111 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4112 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4113 : Intrinsic::x86_avx2_psrl_q;
4114 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4115 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4116 : Intrinsic::x86_avx2_psrl_w;
4117 else
4118 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4119 } else {
4120 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4121 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4122 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4123 : Intrinsic::x86_avx512_psrl_d_512;
4124 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4125 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4126 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4127 : Intrinsic::x86_avx512_psrl_q_512;
4128 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4129 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4130 : Intrinsic::x86_avx512_psrl_w_512;
4131 else
4132 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4133 }
4134
4135 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4136 } else if (Name.starts_with("avx512.mask.psra")) {
4137 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4138 bool IsVariable = Name[16] == 'v';
4139 char Size = Name[16] == '.' ? Name[17]
4140 : Name[17] == '.' ? Name[18]
4141 : Name[18] == '.' ? Name[19]
4142 : Name[20];
4143
4144 Intrinsic::ID IID;
4145 if (IsVariable && Name[17] != '.') {
4146 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4147 IID = Intrinsic::x86_avx2_psrav_d;
4148 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4149 IID = Intrinsic::x86_avx2_psrav_d_256;
4150 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4151 IID = Intrinsic::x86_avx512_psrav_w_128;
4152 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4153 IID = Intrinsic::x86_avx512_psrav_w_256;
4154 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4155 IID = Intrinsic::x86_avx512_psrav_w_512;
4156 else
4157 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4158 } else if (Name.ends_with(".128")) {
4159 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4160 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4161 : Intrinsic::x86_sse2_psra_d;
4162 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4163 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4164 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4165 : Intrinsic::x86_avx512_psra_q_128;
4166 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4167 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4168 : Intrinsic::x86_sse2_psra_w;
4169 else
4170 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4171 } else if (Name.ends_with(".256")) {
4172 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4173 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4174 : Intrinsic::x86_avx2_psra_d;
4175 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4176 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4177 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4178 : Intrinsic::x86_avx512_psra_q_256;
4179 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4180 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4181 : Intrinsic::x86_avx2_psra_w;
4182 else
4183 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4184 } else {
4185 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4186 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4187 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4188 : Intrinsic::x86_avx512_psra_d_512;
4189 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4190 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4191 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4192 : Intrinsic::x86_avx512_psra_q_512;
4193 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4194 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4195 : Intrinsic::x86_avx512_psra_w_512;
4196 else
4197 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4198 }
4199
4200 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4201 } else if (Name.starts_with("avx512.mask.move.s")) {
4202 Rep = upgradeMaskedMove(Builder, *CI);
4203 } else if (Name.starts_with("avx512.cvtmask2")) {
4204 Rep = upgradeMaskToInt(Builder, *CI);
4205 } else if (Name.ends_with(".movntdqa")) {
4207 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4208
4209 LoadInst *LI = Builder.CreateAlignedLoad(
4210 CI->getType(), CI->getArgOperand(0),
4212 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4213 Rep = LI;
4214 } else if (Name.starts_with("fma.vfmadd.") ||
4215 Name.starts_with("fma.vfmsub.") ||
4216 Name.starts_with("fma.vfnmadd.") ||
4217 Name.starts_with("fma.vfnmsub.")) {
4218 bool NegMul = Name[6] == 'n';
4219 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4220 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4221
4222 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4223 CI->getArgOperand(2)};
4224
4225 if (IsScalar) {
4226 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4227 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4228 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4229 }
4230
4231 if (NegMul && !IsScalar)
4232 Ops[0] = Builder.CreateFNeg(Ops[0]);
4233 if (NegMul && IsScalar)
4234 Ops[1] = Builder.CreateFNeg(Ops[1]);
4235 if (NegAcc)
4236 Ops[2] = Builder.CreateFNeg(Ops[2]);
4237
4238 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4239
4240 if (IsScalar)
4241 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4242 } else if (Name.starts_with("fma4.vfmadd.s")) {
4243 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4244 CI->getArgOperand(2)};
4245
4246 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4247 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4248 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4249
4250 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4251
4252 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4253 Rep, (uint64_t)0);
4254 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4255 Name.starts_with("avx512.maskz.vfmadd.s") ||
4256 Name.starts_with("avx512.mask3.vfmadd.s") ||
4257 Name.starts_with("avx512.mask3.vfmsub.s") ||
4258 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4259 bool IsMask3 = Name[11] == '3';
4260 bool IsMaskZ = Name[11] == 'z';
4261 // Drop the "avx512.mask." to make it easier.
4262 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4263 bool NegMul = Name[2] == 'n';
4264 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4265
4266 Value *A = CI->getArgOperand(0);
4267 Value *B = CI->getArgOperand(1);
4268 Value *C = CI->getArgOperand(2);
4269
4270 if (NegMul && (IsMask3 || IsMaskZ))
4271 A = Builder.CreateFNeg(A);
4272 if (NegMul && !(IsMask3 || IsMaskZ))
4273 B = Builder.CreateFNeg(B);
4274 if (NegAcc)
4275 C = Builder.CreateFNeg(C);
4276
4277 A = Builder.CreateExtractElement(A, (uint64_t)0);
4278 B = Builder.CreateExtractElement(B, (uint64_t)0);
4279 C = Builder.CreateExtractElement(C, (uint64_t)0);
4280
4281 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4282 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4283 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4284
4285 Intrinsic::ID IID;
4286 if (Name.back() == 'd')
4287 IID = Intrinsic::x86_avx512_vfmadd_f64;
4288 else
4289 IID = Intrinsic::x86_avx512_vfmadd_f32;
4290 Rep = Builder.CreateIntrinsic(IID, Ops);
4291 } else {
4292 Rep = Builder.CreateFMA(A, B, C);
4293 }
4294
4295 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4296 : IsMask3 ? C
4297 : A;
4298
4299 // For Mask3 with NegAcc, we need to create a new extractelement that
4300 // avoids the negation above.
4301 if (NegAcc && IsMask3)
4302 PassThru =
4303 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4304
4305 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4306 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4307 (uint64_t)0);
4308 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4309 Name.starts_with("avx512.mask.vfnmadd.p") ||
4310 Name.starts_with("avx512.mask.vfnmsub.p") ||
4311 Name.starts_with("avx512.mask3.vfmadd.p") ||
4312 Name.starts_with("avx512.mask3.vfmsub.p") ||
4313 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4314 Name.starts_with("avx512.maskz.vfmadd.p")) {
4315 bool IsMask3 = Name[11] == '3';
4316 bool IsMaskZ = Name[11] == 'z';
4317 // Drop the "avx512.mask." to make it easier.
4318 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4319 bool NegMul = Name[2] == 'n';
4320 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4321
4322 Value *A = CI->getArgOperand(0);
4323 Value *B = CI->getArgOperand(1);
4324 Value *C = CI->getArgOperand(2);
4325
4326 if (NegMul && (IsMask3 || IsMaskZ))
4327 A = Builder.CreateFNeg(A);
4328 if (NegMul && !(IsMask3 || IsMaskZ))
4329 B = Builder.CreateFNeg(B);
4330 if (NegAcc)
4331 C = Builder.CreateFNeg(C);
4332
4333 if (CI->arg_size() == 5 &&
4334 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4335 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4336 Intrinsic::ID IID;
4337 // Check the character before ".512" in string.
4338 if (Name[Name.size() - 5] == 's')
4339 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4340 else
4341 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4342
4343 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4344 } else {
4345 Rep = Builder.CreateFMA(A, B, C);
4346 }
4347
4348 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4349 : IsMask3 ? CI->getArgOperand(2)
4350 : CI->getArgOperand(0);
4351
4352 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4353 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4354 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4355 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4356 Intrinsic::ID IID;
4357 if (VecWidth == 128 && EltWidth == 32)
4358 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4359 else if (VecWidth == 256 && EltWidth == 32)
4360 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4361 else if (VecWidth == 128 && EltWidth == 64)
4362 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4363 else if (VecWidth == 256 && EltWidth == 64)
4364 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4365 else
4366 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4367
4368 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4369 CI->getArgOperand(2)};
4370 Ops[2] = Builder.CreateFNeg(Ops[2]);
4371 Rep = Builder.CreateIntrinsic(IID, Ops);
4372 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4373 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4374 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4375 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4376 bool IsMask3 = Name[11] == '3';
4377 bool IsMaskZ = Name[11] == 'z';
4378 // Drop the "avx512.mask." to make it easier.
4379 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4380 bool IsSubAdd = Name[3] == 's';
4381 if (CI->arg_size() == 5) {
4382 Intrinsic::ID IID;
4383 // Check the character before ".512" in string.
4384 if (Name[Name.size() - 5] == 's')
4385 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4386 else
4387 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4388
4389 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4390 CI->getArgOperand(2), CI->getArgOperand(4)};
4391 if (IsSubAdd)
4392 Ops[2] = Builder.CreateFNeg(Ops[2]);
4393
4394 Rep = Builder.CreateIntrinsic(IID, Ops);
4395 } else {
4396 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4397
4398 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4399 CI->getArgOperand(2)};
4400
4402 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4403 Value *Odd = Builder.CreateCall(FMA, Ops);
4404 Ops[2] = Builder.CreateFNeg(Ops[2]);
4405 Value *Even = Builder.CreateCall(FMA, Ops);
4406
4407 if (IsSubAdd)
4408 std::swap(Even, Odd);
4409
4410 SmallVector<int, 32> Idxs(NumElts);
4411 for (int i = 0; i != NumElts; ++i)
4412 Idxs[i] = i + (i % 2) * NumElts;
4413
4414 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4415 }
4416
4417 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4418 : IsMask3 ? CI->getArgOperand(2)
4419 : CI->getArgOperand(0);
4420
4421 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4422 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4423 Name.starts_with("avx512.maskz.pternlog.")) {
4424 bool ZeroMask = Name[11] == 'z';
4425 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4426 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4427 Intrinsic::ID IID;
4428 if (VecWidth == 128 && EltWidth == 32)
4429 IID = Intrinsic::x86_avx512_pternlog_d_128;
4430 else if (VecWidth == 256 && EltWidth == 32)
4431 IID = Intrinsic::x86_avx512_pternlog_d_256;
4432 else if (VecWidth == 512 && EltWidth == 32)
4433 IID = Intrinsic::x86_avx512_pternlog_d_512;
4434 else if (VecWidth == 128 && EltWidth == 64)
4435 IID = Intrinsic::x86_avx512_pternlog_q_128;
4436 else if (VecWidth == 256 && EltWidth == 64)
4437 IID = Intrinsic::x86_avx512_pternlog_q_256;
4438 else if (VecWidth == 512 && EltWidth == 64)
4439 IID = Intrinsic::x86_avx512_pternlog_q_512;
4440 else
4441 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4442
4443 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4444 CI->getArgOperand(2), CI->getArgOperand(3)};
4445 Rep = Builder.CreateIntrinsic(IID, Args);
4446 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4447 : CI->getArgOperand(0);
4448 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4449 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4450 Name.starts_with("avx512.maskz.vpmadd52")) {
4451 bool ZeroMask = Name[11] == 'z';
4452 bool High = Name[20] == 'h' || Name[21] == 'h';
4453 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4454 Intrinsic::ID IID;
4455 if (VecWidth == 128 && !High)
4456 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4457 else if (VecWidth == 256 && !High)
4458 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4459 else if (VecWidth == 512 && !High)
4460 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4461 else if (VecWidth == 128 && High)
4462 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4463 else if (VecWidth == 256 && High)
4464 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4465 else if (VecWidth == 512 && High)
4466 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4467 else
4468 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4469
4470 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4471 CI->getArgOperand(2)};
4472 Rep = Builder.CreateIntrinsic(IID, Args);
4473 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4474 : CI->getArgOperand(0);
4475 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4476 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4477 Name.starts_with("avx512.mask.vpermt2var.") ||
4478 Name.starts_with("avx512.maskz.vpermt2var.")) {
4479 bool ZeroMask = Name[11] == 'z';
4480 bool IndexForm = Name[17] == 'i';
4481 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4482 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4483 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4484 Name.starts_with("avx512.mask.vpdpbusds.") ||
4485 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4486 bool ZeroMask = Name[11] == 'z';
4487 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4488 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4489 Intrinsic::ID IID;
4490 if (VecWidth == 128 && !IsSaturating)
4491 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4492 else if (VecWidth == 256 && !IsSaturating)
4493 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4494 else if (VecWidth == 512 && !IsSaturating)
4495 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4496 else if (VecWidth == 128 && IsSaturating)
4497 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4498 else if (VecWidth == 256 && IsSaturating)
4499 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4500 else if (VecWidth == 512 && IsSaturating)
4501 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4502 else
4503 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4504
4505 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4506 CI->getArgOperand(2)};
4507
4508 // Input arguments types were incorrectly set to vectors of i32 before but
4509 // they should be vectors of i8. Insert bit cast when encountering the old
4510 // types
4511 if (Args[1]->getType()->isVectorTy() &&
4512 cast<VectorType>(Args[1]->getType())
4513 ->getElementType()
4514 ->isIntegerTy(32) &&
4515 Args[2]->getType()->isVectorTy() &&
4516 cast<VectorType>(Args[2]->getType())
4517 ->getElementType()
4518 ->isIntegerTy(32)) {
4519 Type *NewArgType = nullptr;
4520 if (VecWidth == 128)
4521 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4522 else if (VecWidth == 256)
4523 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4524 else if (VecWidth == 512)
4525 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4526 else
4527 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4528 CI);
4529
4530 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4531 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4532 }
4533
4534 Rep = Builder.CreateIntrinsic(IID, Args);
4535 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4536 : CI->getArgOperand(0);
4537 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4538 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4539 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4540 Name.starts_with("avx512.mask.vpdpwssds.") ||
4541 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4542 bool ZeroMask = Name[11] == 'z';
4543 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4544 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4545 Intrinsic::ID IID;
4546 if (VecWidth == 128 && !IsSaturating)
4547 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4548 else if (VecWidth == 256 && !IsSaturating)
4549 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4550 else if (VecWidth == 512 && !IsSaturating)
4551 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4552 else if (VecWidth == 128 && IsSaturating)
4553 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4554 else if (VecWidth == 256 && IsSaturating)
4555 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4556 else if (VecWidth == 512 && IsSaturating)
4557 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4558 else
4559 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4560
4561 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4562 CI->getArgOperand(2)};
4563
4564 // Input arguments types were incorrectly set to vectors of i32 before but
4565 // they should be vectors of i16. Insert bit cast when encountering the old
4566 // types
4567 if (Args[1]->getType()->isVectorTy() &&
4568 cast<VectorType>(Args[1]->getType())
4569 ->getElementType()
4570 ->isIntegerTy(32) &&
4571 Args[2]->getType()->isVectorTy() &&
4572 cast<VectorType>(Args[2]->getType())
4573 ->getElementType()
4574 ->isIntegerTy(32)) {
4575 Type *NewArgType = nullptr;
4576 if (VecWidth == 128)
4577 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4578 else if (VecWidth == 256)
4579 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4580 else if (VecWidth == 512)
4581 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4582 else
4583 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4584 CI);
4585
4586 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4587 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4588 }
4589
4590 Rep = Builder.CreateIntrinsic(IID, Args);
4591 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4592 : CI->getArgOperand(0);
4593 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4594 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4595 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4596 Name == "subborrow.u32" || Name == "subborrow.u64") {
4597 Intrinsic::ID IID;
4598 if (Name[0] == 'a' && Name.back() == '2')
4599 IID = Intrinsic::x86_addcarry_32;
4600 else if (Name[0] == 'a' && Name.back() == '4')
4601 IID = Intrinsic::x86_addcarry_64;
4602 else if (Name[0] == 's' && Name.back() == '2')
4603 IID = Intrinsic::x86_subborrow_32;
4604 else if (Name[0] == 's' && Name.back() == '4')
4605 IID = Intrinsic::x86_subborrow_64;
4606 else
4607 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4608
4609 // Make a call with 3 operands.
4610 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4611 CI->getArgOperand(2)};
4612 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4613
4614 // Extract the second result and store it.
4615 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4616 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4617 // Replace the original call result with the first result of the new call.
4618 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4619
4620 CI->replaceAllUsesWith(CF);
4621 Rep = nullptr;
4622 } else if (Name.starts_with("avx512.mask.") &&
4623 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4624 // Rep will be updated by the call in the condition.
4625 } else if (Name.starts_with("bmi.pdep.")) {
4626 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::pdep);
4627 } else if (Name.starts_with("bmi.pext.")) {
4628 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::pext);
4629 } else
4630 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4631
4632 return Rep;
4633}
4634
4636 Function *F, IRBuilder<> &Builder) {
4637 if (Name.starts_with("neon.bfcvt")) {
4638 if (Name.starts_with("neon.bfcvtn2")) {
4639 SmallVector<int, 32> LoMask(4);
4640 std::iota(LoMask.begin(), LoMask.end(), 0);
4641 SmallVector<int, 32> ConcatMask(8);
4642 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4643 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4644 Value *Trunc =
4645 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4646 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4647 } else if (Name.starts_with("neon.bfcvtn")) {
4648 SmallVector<int, 32> ConcatMask(8);
4649 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4650 Type *V4BF16 =
4651 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4652 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4653 dbgs() << "Trunc: " << *Trunc << "\n";
4654 return Builder.CreateShuffleVector(
4655 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4656 } else {
4657 return Builder.CreateFPTrunc(CI->getOperand(0),
4658 Type::getBFloatTy(F->getContext()));
4659 }
4660 } else if (Name.starts_with("sve.fcvt")) {
4661 Intrinsic::ID NewID =
4663 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4664 .Case("sve.fcvtnt.bf16f32",
4665 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4667 if (NewID == Intrinsic::not_intrinsic)
4668 llvm_unreachable("Unhandled Intrinsic!");
4669
4670 SmallVector<Value *, 3> Args(CI->args());
4671
4672 // The original intrinsics incorrectly used a predicate based on the
4673 // smallest element type rather than the largest.
4674 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4675 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4676
4677 if (Args[1]->getType() != BadPredTy)
4678 llvm_unreachable("Unexpected predicate type!");
4679
4680 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4681 BadPredTy, Args[1]);
4682 Args[1] = Builder.CreateIntrinsic(
4683 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4684
4685 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4686 CI->getName());
4687 }
4688
4689 if (Name == "neon.vcvtfp2hf")
4690 return Builder.CreateBitCast(
4691 Builder.CreateFPTrunc(
4692 CI->getOperand(0),
4693 FixedVectorType::get(Type::getHalfTy(F->getContext()), 4)),
4694 FixedVectorType::get(Type::getInt16Ty(F->getContext()), 4));
4695 if (Name == "neon.vcvthf2fp")
4696 return Builder.CreateFPExt(
4697 Builder.CreateBitCast(
4698 CI->getOperand(0),
4699 FixedVectorType::get(Type::getHalfTy(F->getContext()), 4)),
4700 FixedVectorType::get(Type::getFloatTy(F->getContext()), 4));
4701
4702 llvm_unreachable("Unhandled Intrinsic!");
4703}
4704
4706 IRBuilder<> &Builder) {
4707 if (Name == "mve.vctp64.old") {
4708 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4709 // correct type.
4710 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4711 CI->getArgOperand(0),
4712 /*FMFSource=*/nullptr, CI->getName());
4713 Value *C1 = Builder.CreateIntrinsic(
4714 Intrinsic::arm_mve_pred_v2i,
4715 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4716 return Builder.CreateIntrinsic(
4717 Intrinsic::arm_mve_pred_i2v,
4718 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4719 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4720 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4721 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4722 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4723 Name ==
4724 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4725 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4726 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4727 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4728 Name ==
4729 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4730 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4731 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4732 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4733 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4734 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4735 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4736 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4737 std::vector<Type *> Tys;
4738 unsigned ID = CI->getIntrinsicID();
4739 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4740 switch (ID) {
4741 case Intrinsic::arm_mve_mull_int_predicated:
4742 case Intrinsic::arm_mve_vqdmull_predicated:
4743 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4744 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4745 break;
4746 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4747 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4748 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4749 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4750 V2I1Ty};
4751 break;
4752 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4753 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4754 CI->getOperand(1)->getType(), V2I1Ty};
4755 break;
4756 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4757 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4758 CI->getOperand(2)->getType(), V2I1Ty};
4759 break;
4760 case Intrinsic::arm_cde_vcx1q_predicated:
4761 case Intrinsic::arm_cde_vcx1qa_predicated:
4762 case Intrinsic::arm_cde_vcx2q_predicated:
4763 case Intrinsic::arm_cde_vcx2qa_predicated:
4764 case Intrinsic::arm_cde_vcx3q_predicated:
4765 case Intrinsic::arm_cde_vcx3qa_predicated:
4766 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4767 break;
4768 default:
4769 llvm_unreachable("Unhandled Intrinsic!");
4770 }
4771
4772 std::vector<Value *> Ops;
4773 for (Value *Op : CI->args()) {
4774 Type *Ty = Op->getType();
4775 if (Ty->getScalarSizeInBits() == 1) {
4776 Value *C1 = Builder.CreateIntrinsic(
4777 Intrinsic::arm_mve_pred_v2i,
4778 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4779 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4780 }
4781 Ops.push_back(Op);
4782 }
4783
4784 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4785 CI->getName());
4786 }
4787 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4788}
4789
4790// These are expected to have the arguments:
4791// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4792//
4793// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4794//
4796 Function *F, IRBuilder<> &Builder) {
4797 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4798 // for compatibility.
4799 auto UpgradeLegacyWMMAIUIntrinsicCall =
4800 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4801 ArrayRef<Type *> OverloadTys) -> Value * {
4802 // Prepare arguments, append clamp=0 for compatibility
4803 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4804 Args.push_back(Builder.getFalse());
4805
4806 // Insert the declaration for the right overload types
4808 F->getParent(), F->getIntrinsicID(), OverloadTys);
4809
4810 // Copy operand bundles if any
4812 CI->getOperandBundlesAsDefs(Bundles);
4813
4814 // Create the new call and copy calling properties
4815 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4816 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4817 NewCall->setCallingConv(CI->getCallingConv());
4818 NewCall->setAttributes(CI->getAttributes());
4819 NewCall->setDebugLoc(CI->getDebugLoc());
4820 NewCall->copyMetadata(*CI);
4821 return NewCall;
4822 };
4823
4824 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4825 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4826 "intrinsic should have 7 arguments");
4827 Type *T1 = CI->getArgOperand(4)->getType();
4828 Type *T2 = CI->getArgOperand(1)->getType();
4829 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4830 }
4831 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4832 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4833 "intrinsic should have 8 arguments");
4834 Type *T1 = CI->getArgOperand(4)->getType();
4835 Type *T2 = CI->getArgOperand(1)->getType();
4836 Type *T3 = CI->getArgOperand(3)->getType();
4837 Type *T4 = CI->getArgOperand(5)->getType();
4838 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4839 }
4840
4841 switch (F->getIntrinsicID()) {
4842 default:
4843 break;
4844 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4845 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4846 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4847 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4848 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4849 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4850 // Drop src0 and src1 modifiers.
4851 const Value *Op0 = CI->getArgOperand(0);
4852 const Value *Op2 = CI->getArgOperand(2);
4853 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4854 const ConstantInt *ModA = dyn_cast<ConstantInt>(Op0);
4855 const ConstantInt *ModB = dyn_cast<ConstantInt>(Op2);
4856 if (!ModA->isZero() || !ModB->isZero())
4857 reportFatalUsageError(Name + " matrix A and B modifiers shall be zero");
4858
4860 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4861 Args.push_back(CI->getArgOperand(I));
4862
4863 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4864 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4865 Overloads.push_back(Args[3]->getType());
4867 F->getParent(), F->getIntrinsicID(), Overloads);
4868
4870 CI->getOperandBundlesAsDefs(Bundles);
4871
4872 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4873 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4874 NewCall->setCallingConv(CI->getCallingConv());
4875 NewCall->setAttributes(CI->getAttributes());
4876 NewCall->setDebugLoc(CI->getDebugLoc());
4877 NewCall->copyMetadata(*CI);
4878 NewCall->takeName(CI);
4879 return NewCall;
4880 }
4881 }
4882
4883 AtomicRMWInst::BinOp RMWOp =
4885 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4886 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4887 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4888 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4889 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4890 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4891 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4892 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4893 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4894 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4895 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4896 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4897 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4898
4899 unsigned NumOperands = CI->getNumOperands();
4900 if (NumOperands < 3) // Malformed bitcode.
4901 return nullptr;
4902
4903 Value *Ptr = CI->getArgOperand(0);
4904 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4905 if (!PtrTy) // Malformed.
4906 return nullptr;
4907
4908 Value *Val = CI->getArgOperand(1);
4909 if (Val->getType() != CI->getType()) // Malformed.
4910 return nullptr;
4911
4912 ConstantInt *OrderArg = nullptr;
4913 bool IsVolatile = false;
4914
4915 // These should have 5 arguments (plus the callee). A separate version of the
4916 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4917 if (NumOperands > 3)
4918 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4919
4920 // Ignore scope argument at 3
4921
4922 if (NumOperands > 5) {
4923 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4924 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4925 }
4926
4928 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4929 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4932
4933 LLVMContext &Ctx = F->getContext();
4934
4935 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4936 Type *RetTy = CI->getType();
4937 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4938 if (VT->getElementType()->isIntegerTy(16)) {
4939 VectorType *AsBF16 =
4940 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4941 Val = Builder.CreateBitCast(Val, AsBF16);
4942 }
4943 }
4944
4945 // The scope argument never really worked correctly. Use agent as the most
4946 // conservative option which should still always produce the instruction.
4947 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4948 AtomicRMWInst *RMW =
4949 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4950
4951 unsigned AddrSpace = PtrTy->getAddressSpace();
4952 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4953 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4954 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4955 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4956 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4957 }
4958
4959 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4960 MDBuilder MDB(F->getContext());
4961 MDNode *RangeNotPrivate =
4964 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4965 }
4966
4967 if (IsVolatile)
4968 RMW->setVolatile(true);
4969
4970 return Builder.CreateBitCast(RMW, RetTy);
4971}
4972
4973/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4974/// plain MDNode, as it's the verifier's job to check these are the correct
4975/// types later.
4976static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4977 if (Op < CI->arg_size()) {
4978 if (MetadataAsValue *MAV =
4980 Metadata *MD = MAV->getMetadata();
4981 return dyn_cast_if_present<MDNode>(MD);
4982 }
4983 }
4984 return nullptr;
4985}
4986
4987/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4988static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4989 if (Op < CI->arg_size())
4991 return MAV->getMetadata();
4992 return nullptr;
4993}
4994
4995/// Convert debug intrinsic calls to non-instruction debug records.
4996/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4997/// \p CI - The debug intrinsic call.
4999 DbgRecord *DR = nullptr;
5000 if (Name == "label") {
5002 } else if (Name == "assign") {
5005 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
5006 unwrapMAVMetadataOp(CI, 4),
5007 /*The address is a Value ref, it will be stored as a Metadata */
5008 unwrapMAVOp(CI, 5));
5009 } else if (Name == "declare") {
5012 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr);
5013 } else if (Name == "addr") {
5014 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
5015 MDNode *ExprNode = unwrapMAVOp(CI, 2);
5016 // Don't try to add something to the expression if it's not an expression.
5017 // Instead, allow the verifier to fail later.
5018 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
5019 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
5020 }
5023 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr);
5024 } else if (Name == "value") {
5025 // An old version of dbg.value had an extra offset argument.
5026 unsigned VarOp = 1;
5027 unsigned ExprOp = 2;
5028 if (CI->arg_size() == 4) {
5030 // Nonzero offset dbg.values get dropped without a replacement.
5031 if (!Offset || !Offset->isNullValue())
5032 return;
5033 VarOp = 2;
5034 ExprOp = 3;
5035 }
5038 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
5039 nullptr);
5040 }
5041 DR->setDebugLoc(CI->getDebugLoc());
5042 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
5043 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
5044}
5045
5048 if (!Offset)
5049 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
5050 int64_t OffsetVal = Offset->getSExtValue();
5051 return Builder.CreateIntrinsic(OffsetVal >= 0
5052 ? Intrinsic::vector_splice_left
5053 : Intrinsic::vector_splice_right,
5054 CI->getType(),
5055 {CI->getArgOperand(0), CI->getArgOperand(1),
5056 Builder.getInt32(std::abs(OffsetVal))});
5057}
5058
5060 Function *F, IRBuilder<> &Builder) {
5061 if (Name.starts_with("to.fp16")) {
5062 Value *Cast =
5063 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
5064 return Builder.CreateBitCast(Cast, CI->getType());
5065 }
5066
5067 if (Name.starts_with("from.fp16")) {
5068 Value *Cast =
5069 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
5070 return Builder.CreateFPExt(Cast, CI->getType());
5071 }
5072
5073 return nullptr;
5074}
5075
5076/// Upgrade a call to an old intrinsic. All argument and return casting must be
5077/// provided to seamlessly integrate with existing context.
5079 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
5080 // checks the callee's function type matches. It's likely we need to handle
5081 // type changes here.
5083 if (!F)
5084 return;
5085
5086 LLVMContext &C = CI->getContext();
5087 IRBuilder<> Builder(C);
5088 if (isa<FPMathOperator>(CI))
5089 Builder.setFastMathFlags(CI->getFastMathFlags());
5090 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
5091
5092 if (!NewFn) {
5093 // Get the Function's name.
5094 StringRef Name = F->getName();
5095 if (!Name.consume_front("llvm."))
5096 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
5097
5098 bool IsX86 = Name.consume_front("x86.");
5099 bool IsNVVM = Name.consume_front("nvvm.");
5100 bool IsAArch64 = Name.consume_front("aarch64.");
5101 bool IsARM = Name.consume_front("arm.");
5102 bool IsAMDGCN = Name.consume_front("amdgcn.");
5103 bool IsDbg = Name.consume_front("dbg.");
5104 bool IsOldSplice =
5105 (Name.consume_front("experimental.vector.splice") ||
5106 Name.consume_front("vector.splice")) &&
5107 !(Name.starts_with(".left") || Name.starts_with(".right"));
5108 Value *Rep = nullptr;
5109
5110 if (!IsX86 && Name == "stackprotectorcheck") {
5111 Rep = nullptr;
5112 } else if (IsNVVM) {
5113 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5114 } else if (IsX86) {
5115 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5116 } else if (IsAArch64) {
5117 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5118 } else if (IsARM) {
5119 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5120 } else if (IsAMDGCN) {
5121 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5122 } else if (IsDbg) {
5124 } else if (IsOldSplice) {
5125 Rep = upgradeVectorSplice(CI, Builder);
5126 } else if (Name.consume_front("convert.")) {
5127 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5128 } else {
5129 llvm_unreachable("Unknown function for CallBase upgrade.");
5130 }
5131
5132 if (Rep)
5133 CI->replaceAllUsesWith(Rep);
5134 CI->eraseFromParent();
5135 return;
5136 }
5137
5138 const auto &DefaultCase = [&]() -> void {
5139 if (F == NewFn)
5140 return;
5141
5142 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5143 // Handle generic mangling change.
5144 assert(
5145 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5146 "Unknown function for CallBase upgrade and isn't just a name change");
5147 CI->setCalledFunction(NewFn);
5148 return;
5149 }
5150
5151 // This must be an upgrade from a named to a literal struct.
5152 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
5153 assert(OldST != NewFn->getReturnType() &&
5154 "Return type must have changed");
5155 assert(OldST->getNumElements() ==
5156 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5157 "Must have same number of elements");
5158
5159 SmallVector<Value *> Args(CI->args());
5160 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5161 NewCI->setAttributes(CI->getAttributes());
5162 Value *Res = PoisonValue::get(OldST);
5163 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5164 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5165 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5166 }
5167 CI->replaceAllUsesWith(Res);
5168 CI->eraseFromParent();
5169 return;
5170 }
5171
5172 // We're probably about to produce something invalid. Let the verifier catch
5173 // it instead of dying here.
5174 CI->setCalledOperand(
5176 return;
5177 };
5178 CallInst *NewCall = nullptr;
5179 switch (NewFn->getIntrinsicID()) {
5180 default: {
5181 DefaultCase();
5182 return;
5183 }
5184 case Intrinsic::arm_neon_vst1:
5185 case Intrinsic::arm_neon_vst2:
5186 case Intrinsic::arm_neon_vst3:
5187 case Intrinsic::arm_neon_vst4:
5188 case Intrinsic::arm_neon_vst2lane:
5189 case Intrinsic::arm_neon_vst3lane:
5190 case Intrinsic::arm_neon_vst4lane: {
5191 SmallVector<Value *, 4> Args(CI->args());
5192 NewCall = Builder.CreateCall(NewFn, Args);
5193 break;
5194 }
5195 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5196 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5197 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5198 LLVMContext &Ctx = F->getParent()->getContext();
5199 SmallVector<Value *, 4> Args(CI->args());
5200 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5201 cast<ConstantInt>(Args[3])->getZExtValue());
5202 NewCall = Builder.CreateCall(NewFn, Args);
5203 break;
5204 }
5205 case Intrinsic::aarch64_sve_ld3_sret:
5206 case Intrinsic::aarch64_sve_ld4_sret:
5207 case Intrinsic::aarch64_sve_ld2_sret: {
5208 // Is this a trivial remangle of the name to support ptr address spaces?
5209 if (isa<StructType>(F->getReturnType())) {
5210 DefaultCase();
5211 return;
5212 }
5213
5214 StringRef Name = F->getName();
5215 Name = Name.substr(5);
5216 unsigned N = StringSwitch<unsigned>(Name)
5217 .StartsWith("aarch64.sve.ld2", 2)
5218 .StartsWith("aarch64.sve.ld3", 3)
5219 .StartsWith("aarch64.sve.ld4", 4)
5220 .Default(0);
5221 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5222 unsigned MinElts = RetTy->getMinNumElements() / N;
5223 SmallVector<Value *, 2> Args(CI->args());
5224 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5225 Value *Ret = llvm::PoisonValue::get(RetTy);
5226 for (unsigned I = 0; I < N; I++) {
5227 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5228 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5229 }
5230 NewCall = dyn_cast<CallInst>(Ret);
5231 break;
5232 }
5233
5234 case Intrinsic::coro_end: {
5235 SmallVector<Value *, 3> Args(CI->args());
5236 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5237 NewCall = Builder.CreateCall(NewFn, Args);
5238 break;
5239 }
5240
5241 case Intrinsic::vector_extract: {
5242 StringRef Name = F->getName();
5243 Name = Name.substr(5); // Strip llvm
5244 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5245 DefaultCase();
5246 return;
5247 }
5248 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5249 unsigned MinElts = RetTy->getMinNumElements();
5250 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5251 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5252 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5253 break;
5254 }
5255
5256 case Intrinsic::vector_insert: {
5257 StringRef Name = F->getName();
5258 Name = Name.substr(5);
5259 if (!Name.starts_with("aarch64.sve.tuple")) {
5260 DefaultCase();
5261 return;
5262 }
5263 if (Name.starts_with("aarch64.sve.tuple.set")) {
5264 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5265 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5266 Value *NewIdx =
5267 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5268 NewCall = Builder.CreateCall(
5269 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5270 break;
5271 }
5272 if (Name.starts_with("aarch64.sve.tuple.create")) {
5273 unsigned N = StringSwitch<unsigned>(Name)
5274 .StartsWith("aarch64.sve.tuple.create2", 2)
5275 .StartsWith("aarch64.sve.tuple.create3", 3)
5276 .StartsWith("aarch64.sve.tuple.create4", 4)
5277 .Default(0);
5278 assert(N > 1 && "Create is expected to be between 2-4");
5279 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5280 Value *Ret = llvm::PoisonValue::get(RetTy);
5281 unsigned MinElts = RetTy->getMinNumElements() / N;
5282 for (unsigned I = 0; I < N; I++) {
5283 Value *V = CI->getArgOperand(I);
5284 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5285 }
5286 NewCall = dyn_cast<CallInst>(Ret);
5287 }
5288 break;
5289 }
5290
5291 case Intrinsic::arm_neon_bfdot:
5292 case Intrinsic::arm_neon_bfmmla:
5293 case Intrinsic::arm_neon_bfmlalb:
5294 case Intrinsic::arm_neon_bfmlalt:
5295 case Intrinsic::aarch64_neon_bfdot:
5296 case Intrinsic::aarch64_neon_bfmmla:
5297 case Intrinsic::aarch64_neon_bfmlalb:
5298 case Intrinsic::aarch64_neon_bfmlalt: {
5300 assert(CI->arg_size() == 3 &&
5301 "Mismatch between function args and call args");
5302 size_t OperandWidth =
5304 assert((OperandWidth == 64 || OperandWidth == 128) &&
5305 "Unexpected operand width");
5306 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5307 auto Iter = CI->args().begin();
5308 Args.push_back(*Iter++);
5309 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5310 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5311 NewCall = Builder.CreateCall(NewFn, Args);
5312 break;
5313 }
5314
5315 case Intrinsic::bitreverse:
5316 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5317 break;
5318
5319 case Intrinsic::ctlz:
5320 case Intrinsic::cttz: {
5321 if (CI->arg_size() != 1) {
5322 DefaultCase();
5323 return;
5324 }
5325
5326 NewCall =
5327 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5328 break;
5329 }
5330
5331 case Intrinsic::objectsize: {
5332 Value *NullIsUnknownSize =
5333 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5334 Value *Dynamic =
5335 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5336 NewCall = Builder.CreateCall(
5337 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5338 break;
5339 }
5340
5341 case Intrinsic::ctpop:
5342 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5343 break;
5344 case Intrinsic::dbg_value: {
5345 StringRef Name = F->getName();
5346 Name = Name.substr(5); // Strip llvm.
5347 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5348 if (Name.starts_with("dbg.addr")) {
5350 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5351 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5352 NewCall =
5353 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5354 MetadataAsValue::get(C, Expr)});
5355 break;
5356 }
5357
5358 // Upgrade from the old version that had an extra offset argument.
5359 assert(CI->arg_size() == 4);
5360 // Drop nonzero offsets instead of attempting to upgrade them.
5362 if (Offset->isNullValue()) {
5363 NewCall = Builder.CreateCall(
5364 NewFn,
5365 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5366 break;
5367 }
5368 CI->eraseFromParent();
5369 return;
5370 }
5371
5372 case Intrinsic::ptr_annotation:
5373 // Upgrade from versions that lacked the annotation attribute argument.
5374 if (CI->arg_size() != 4) {
5375 DefaultCase();
5376 return;
5377 }
5378
5379 // Create a new call with an added null annotation attribute argument.
5380 NewCall = Builder.CreateCall(
5381 NewFn,
5382 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5383 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5384 NewCall->takeName(CI);
5385 CI->replaceAllUsesWith(NewCall);
5386 CI->eraseFromParent();
5387 return;
5388
5389 case Intrinsic::var_annotation:
5390 // Upgrade from versions that lacked the annotation attribute argument.
5391 if (CI->arg_size() != 4) {
5392 DefaultCase();
5393 return;
5394 }
5395 // Create a new call with an added null annotation attribute argument.
5396 NewCall = Builder.CreateCall(
5397 NewFn,
5398 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5399 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5400 NewCall->takeName(CI);
5401 CI->replaceAllUsesWith(NewCall);
5402 CI->eraseFromParent();
5403 return;
5404
5405 case Intrinsic::riscv_aes32dsi:
5406 case Intrinsic::riscv_aes32dsmi:
5407 case Intrinsic::riscv_aes32esi:
5408 case Intrinsic::riscv_aes32esmi:
5409 case Intrinsic::riscv_sm4ks:
5410 case Intrinsic::riscv_sm4ed: {
5411 // The last argument to these intrinsics used to be i8 and changed to i32.
5412 // The type overload for sm4ks and sm4ed was removed.
5413 Value *Arg2 = CI->getArgOperand(2);
5414 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5415 return;
5416
5417 Value *Arg0 = CI->getArgOperand(0);
5418 Value *Arg1 = CI->getArgOperand(1);
5419 if (CI->getType()->isIntegerTy(64)) {
5420 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5421 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5422 }
5423
5424 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5425 cast<ConstantInt>(Arg2)->getZExtValue());
5426
5427 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5428 Value *Res = NewCall;
5429 if (Res->getType() != CI->getType())
5430 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5431 NewCall->takeName(CI);
5432 CI->replaceAllUsesWith(Res);
5433 CI->eraseFromParent();
5434 return;
5435 }
5436 case Intrinsic::nvvm_mapa_shared_cluster: {
5437 // Create a new call with the correct address space.
5438 NewCall =
5439 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5440 Value *Res = NewCall;
5441 Res = Builder.CreateAddrSpaceCast(
5442 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5443 NewCall->takeName(CI);
5444 CI->replaceAllUsesWith(Res);
5445 CI->eraseFromParent();
5446 return;
5447 }
5448 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5449 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5450 // Create a new call with the correct address space.
5451 SmallVector<Value *, 4> Args(CI->args());
5452 Args[0] = Builder.CreateAddrSpaceCast(
5453 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5454
5455 NewCall = Builder.CreateCall(NewFn, Args);
5456 NewCall->takeName(CI);
5457 CI->replaceAllUsesWith(NewCall);
5458 CI->eraseFromParent();
5459 return;
5460 }
5461 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5462 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5463 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5464 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5465 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5466 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5467 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5468 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5469 SmallVector<Value *, 16> Args(CI->args());
5470
5471 // Create AddrSpaceCast to shared_cluster if needed.
5472 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5473 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5475 Args[0] = Builder.CreateAddrSpaceCast(
5476 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5477
5478 // Attach the flag argument for cta_group, with a
5479 // default value of 0. This handles case (2) in
5480 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5481 size_t NumArgs = CI->arg_size();
5482 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5483 if (!FlagArg->getType()->isIntegerTy(1))
5484 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5485
5486 NewCall = Builder.CreateCall(NewFn, Args);
5487 NewCall->takeName(CI);
5488 CI->replaceAllUsesWith(NewCall);
5489 CI->eraseFromParent();
5490 return;
5491 }
5492 case Intrinsic::riscv_sha256sig0:
5493 case Intrinsic::riscv_sha256sig1:
5494 case Intrinsic::riscv_sha256sum0:
5495 case Intrinsic::riscv_sha256sum1:
5496 case Intrinsic::riscv_sm3p0:
5497 case Intrinsic::riscv_sm3p1: {
5498 // The last argument to these intrinsics used to be i8 and changed to i32.
5499 // The type overload for sm4ks and sm4ed was removed.
5500 if (!CI->getType()->isIntegerTy(64))
5501 return;
5502
5503 Value *Arg =
5504 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5505
5506 NewCall = Builder.CreateCall(NewFn, Arg);
5507 Value *Res =
5508 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5509 NewCall->takeName(CI);
5510 CI->replaceAllUsesWith(Res);
5511 CI->eraseFromParent();
5512 return;
5513 }
5514
5515 case Intrinsic::x86_xop_vfrcz_ss:
5516 case Intrinsic::x86_xop_vfrcz_sd:
5517 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5518 break;
5519
5520 case Intrinsic::x86_xop_vpermil2pd:
5521 case Intrinsic::x86_xop_vpermil2ps:
5522 case Intrinsic::x86_xop_vpermil2pd_256:
5523 case Intrinsic::x86_xop_vpermil2ps_256: {
5524 SmallVector<Value *, 4> Args(CI->args());
5525 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5526 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5527 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5528 NewCall = Builder.CreateCall(NewFn, Args);
5529 break;
5530 }
5531
5532 case Intrinsic::x86_sse41_ptestc:
5533 case Intrinsic::x86_sse41_ptestz:
5534 case Intrinsic::x86_sse41_ptestnzc: {
5535 // The arguments for these intrinsics used to be v4f32, and changed
5536 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5537 // So, the only thing required is a bitcast for both arguments.
5538 // First, check the arguments have the old type.
5539 Value *Arg0 = CI->getArgOperand(0);
5540 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5541 return;
5542
5543 // Old intrinsic, add bitcasts
5544 Value *Arg1 = CI->getArgOperand(1);
5545
5546 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5547
5548 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5549 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5550
5551 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5552 break;
5553 }
5554
5555 case Intrinsic::x86_rdtscp: {
5556 // This used to take 1 arguments. If we have no arguments, it is already
5557 // upgraded.
5558 if (CI->getNumOperands() == 0)
5559 return;
5560
5561 NewCall = Builder.CreateCall(NewFn);
5562 // Extract the second result and store it.
5563 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5564 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5565 // Replace the original call result with the first result of the new call.
5566 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5567
5568 NewCall->takeName(CI);
5569 CI->replaceAllUsesWith(TSC);
5570 CI->eraseFromParent();
5571 return;
5572 }
5573
5574 case Intrinsic::x86_sse41_insertps:
5575 case Intrinsic::x86_sse41_dppd:
5576 case Intrinsic::x86_sse41_dpps:
5577 case Intrinsic::x86_sse41_mpsadbw:
5578 case Intrinsic::x86_avx_dp_ps_256:
5579 case Intrinsic::x86_avx2_mpsadbw: {
5580 // Need to truncate the last argument from i32 to i8 -- this argument models
5581 // an inherently 8-bit immediate operand to these x86 instructions.
5582 SmallVector<Value *, 4> Args(CI->args());
5583
5584 // Replace the last argument with a trunc.
5585 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5586 NewCall = Builder.CreateCall(NewFn, Args);
5587 break;
5588 }
5589
5590 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5591 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5592 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5593 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5594 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5595 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5596 SmallVector<Value *, 4> Args(CI->args());
5597 unsigned NumElts =
5598 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5599 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5600
5601 NewCall = Builder.CreateCall(NewFn, Args);
5602 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5603
5604 NewCall->takeName(CI);
5605 CI->replaceAllUsesWith(Res);
5606 CI->eraseFromParent();
5607 return;
5608 }
5609
5610 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5611 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5612 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5613 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5614 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5615 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5616 SmallVector<Value *, 4> Args(CI->args());
5617 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5618 if (NewFn->getIntrinsicID() ==
5619 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5620 Args[1] = Builder.CreateBitCast(
5621 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5622
5623 NewCall = Builder.CreateCall(NewFn, Args);
5624 Value *Res = Builder.CreateBitCast(
5625 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5626
5627 NewCall->takeName(CI);
5628 CI->replaceAllUsesWith(Res);
5629 CI->eraseFromParent();
5630 return;
5631 }
5632 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5633 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5634 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5635 SmallVector<Value *, 4> Args(CI->args());
5636 unsigned NumElts =
5637 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5638 Args[1] = Builder.CreateBitCast(
5639 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5640 Args[2] = Builder.CreateBitCast(
5641 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5642
5643 NewCall = Builder.CreateCall(NewFn, Args);
5644 break;
5645 }
5646
5647 case Intrinsic::thread_pointer: {
5648 NewCall = Builder.CreateCall(NewFn, {});
5649 break;
5650 }
5651
5652 case Intrinsic::memcpy:
5653 case Intrinsic::memmove:
5654 case Intrinsic::memset: {
5655 // We have to make sure that the call signature is what we're expecting.
5656 // We only want to change the old signatures by removing the alignment arg:
5657 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5658 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5659 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5660 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5661 // Note: i8*'s in the above can be any pointer type
5662 if (CI->arg_size() != 5) {
5663 DefaultCase();
5664 return;
5665 }
5666 // Remove alignment argument (3), and add alignment attributes to the
5667 // dest/src pointers.
5668 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5669 CI->getArgOperand(2), CI->getArgOperand(4)};
5670 NewCall = Builder.CreateCall(NewFn, Args);
5671 AttributeList OldAttrs = CI->getAttributes();
5672 AttributeList NewAttrs = AttributeList::get(
5673 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5674 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5675 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5676 NewCall->setAttributes(NewAttrs);
5677 auto *MemCI = cast<MemIntrinsic>(NewCall);
5678 // All mem intrinsics support dest alignment.
5680 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5681 // Memcpy/Memmove also support source alignment.
5682 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5683 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5684 break;
5685 }
5686
5687 case Intrinsic::masked_load:
5688 case Intrinsic::masked_gather:
5689 case Intrinsic::masked_store:
5690 case Intrinsic::masked_scatter: {
5691 if (CI->arg_size() != 4) {
5692 DefaultCase();
5693 return;
5694 }
5695
5696 auto GetMaybeAlign = [](Value *Op) {
5697 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5698 uint64_t Val = CI->getZExtValue();
5699 if (Val == 0)
5700 return MaybeAlign();
5701 if (isPowerOf2_64(Val))
5702 return MaybeAlign(Val);
5703 }
5704 reportFatalUsageError("Invalid alignment argument");
5705 };
5706 auto GetAlign = [&](Value *Op) {
5707 MaybeAlign Align = GetMaybeAlign(Op);
5708 if (Align)
5709 return *Align;
5710 reportFatalUsageError("Invalid zero alignment argument");
5711 };
5712
5713 const DataLayout &DL = CI->getDataLayout();
5714 switch (NewFn->getIntrinsicID()) {
5715 case Intrinsic::masked_load:
5716 NewCall = Builder.CreateMaskedLoad(
5717 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5718 CI->getArgOperand(2), CI->getArgOperand(3));
5719 break;
5720 case Intrinsic::masked_gather:
5721 NewCall = Builder.CreateMaskedGather(
5722 CI->getType(), CI->getArgOperand(0),
5723 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5724 CI->getType()->getScalarType()),
5725 CI->getArgOperand(2), CI->getArgOperand(3));
5726 break;
5727 case Intrinsic::masked_store:
5728 NewCall = Builder.CreateMaskedStore(
5729 CI->getArgOperand(0), CI->getArgOperand(1),
5730 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5731 break;
5732 case Intrinsic::masked_scatter:
5733 NewCall = Builder.CreateMaskedScatter(
5734 CI->getArgOperand(0), CI->getArgOperand(1),
5735 DL.getValueOrABITypeAlignment(
5736 GetMaybeAlign(CI->getArgOperand(2)),
5737 CI->getArgOperand(0)->getType()->getScalarType()),
5738 CI->getArgOperand(3));
5739 break;
5740 default:
5741 llvm_unreachable("Unexpected intrinsic ID");
5742 }
5743 // Previous metadata is still valid.
5744 NewCall->copyMetadata(*CI);
5745 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5746 break;
5747 }
5748
5749 case Intrinsic::lifetime_start:
5750 case Intrinsic::lifetime_end: {
5751 if (CI->arg_size() != 2) {
5752 DefaultCase();
5753 return;
5754 }
5755
5756 Value *Ptr = CI->getArgOperand(1);
5757 // Try to strip pointer casts, such that the lifetime works on an alloca.
5758 Ptr = Ptr->stripPointerCasts();
5759 if (isa<AllocaInst>(Ptr)) {
5760 // Don't use NewFn, as we might have looked through an addrspacecast.
5761 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5762 NewCall = Builder.CreateLifetimeStart(Ptr);
5763 else
5764 NewCall = Builder.CreateLifetimeEnd(Ptr);
5765 break;
5766 }
5767
5768 // Otherwise remove the lifetime marker.
5769 CI->eraseFromParent();
5770 return;
5771 }
5772
5773 case Intrinsic::x86_avx512_vpdpbusd_128:
5774 case Intrinsic::x86_avx512_vpdpbusd_256:
5775 case Intrinsic::x86_avx512_vpdpbusd_512:
5776 case Intrinsic::x86_avx512_vpdpbusds_128:
5777 case Intrinsic::x86_avx512_vpdpbusds_256:
5778 case Intrinsic::x86_avx512_vpdpbusds_512:
5779 case Intrinsic::x86_avx2_vpdpbssd_128:
5780 case Intrinsic::x86_avx2_vpdpbssd_256:
5781 case Intrinsic::x86_avx10_vpdpbssd_512:
5782 case Intrinsic::x86_avx2_vpdpbssds_128:
5783 case Intrinsic::x86_avx2_vpdpbssds_256:
5784 case Intrinsic::x86_avx10_vpdpbssds_512:
5785 case Intrinsic::x86_avx2_vpdpbsud_128:
5786 case Intrinsic::x86_avx2_vpdpbsud_256:
5787 case Intrinsic::x86_avx10_vpdpbsud_512:
5788 case Intrinsic::x86_avx2_vpdpbsuds_128:
5789 case Intrinsic::x86_avx2_vpdpbsuds_256:
5790 case Intrinsic::x86_avx10_vpdpbsuds_512:
5791 case Intrinsic::x86_avx2_vpdpbuud_128:
5792 case Intrinsic::x86_avx2_vpdpbuud_256:
5793 case Intrinsic::x86_avx10_vpdpbuud_512:
5794 case Intrinsic::x86_avx2_vpdpbuuds_128:
5795 case Intrinsic::x86_avx2_vpdpbuuds_256:
5796 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5797 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5798 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5799 CI->getArgOperand(2)};
5800 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5801 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5802 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5803
5804 NewCall = Builder.CreateCall(NewFn, Args);
5805 break;
5806 }
5807 case Intrinsic::x86_avx512_vpdpwssd_128:
5808 case Intrinsic::x86_avx512_vpdpwssd_256:
5809 case Intrinsic::x86_avx512_vpdpwssd_512:
5810 case Intrinsic::x86_avx512_vpdpwssds_128:
5811 case Intrinsic::x86_avx512_vpdpwssds_256:
5812 case Intrinsic::x86_avx512_vpdpwssds_512:
5813 case Intrinsic::x86_avx2_vpdpwsud_128:
5814 case Intrinsic::x86_avx2_vpdpwsud_256:
5815 case Intrinsic::x86_avx10_vpdpwsud_512:
5816 case Intrinsic::x86_avx2_vpdpwsuds_128:
5817 case Intrinsic::x86_avx2_vpdpwsuds_256:
5818 case Intrinsic::x86_avx10_vpdpwsuds_512:
5819 case Intrinsic::x86_avx2_vpdpwusd_128:
5820 case Intrinsic::x86_avx2_vpdpwusd_256:
5821 case Intrinsic::x86_avx10_vpdpwusd_512:
5822 case Intrinsic::x86_avx2_vpdpwusds_128:
5823 case Intrinsic::x86_avx2_vpdpwusds_256:
5824 case Intrinsic::x86_avx10_vpdpwusds_512:
5825 case Intrinsic::x86_avx2_vpdpwuud_128:
5826 case Intrinsic::x86_avx2_vpdpwuud_256:
5827 case Intrinsic::x86_avx10_vpdpwuud_512:
5828 case Intrinsic::x86_avx2_vpdpwuuds_128:
5829 case Intrinsic::x86_avx2_vpdpwuuds_256:
5830 case Intrinsic::x86_avx10_vpdpwuuds_512:
5831 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5832 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5833 CI->getArgOperand(2)};
5834 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5835 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5836 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5837
5838 NewCall = Builder.CreateCall(NewFn, Args);
5839 break;
5840 }
5841 assert(NewCall && "Should have either set this variable or returned through "
5842 "the default case");
5843 NewCall->takeName(CI);
5844 CI->replaceAllUsesWith(NewCall);
5845 CI->eraseFromParent();
5846}
5847
5849 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5850
5851 // Check if this function should be upgraded and get the replacement function
5852 // if there is one.
5853 Function *NewFn;
5854 if (UpgradeIntrinsicFunction(F, NewFn)) {
5855 // Replace all users of the old function with the new function or new
5856 // instructions. This is not a range loop because the call is deleted.
5857 for (User *U : make_early_inc_range(F->users()))
5858 if (CallBase *CB = dyn_cast<CallBase>(U))
5859 UpgradeIntrinsicCall(CB, NewFn);
5860
5861 // Remove old function, no longer used, from the module.
5862 if (F != NewFn)
5863 F->eraseFromParent();
5864 }
5865}
5866
5868 const unsigned NumOperands = MD.getNumOperands();
5869 if (NumOperands == 0)
5870 return &MD; // Invalid, punt to a verifier error.
5871
5872 // Check if the tag uses struct-path aware TBAA format.
5873 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5874 return &MD;
5875
5876 auto &Context = MD.getContext();
5877 if (NumOperands == 3) {
5878 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5879 MDNode *ScalarType = MDNode::get(Context, Elts);
5880 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5881 Metadata *Elts2[] = {ScalarType, ScalarType,
5884 MD.getOperand(2)};
5885 return MDNode::get(Context, Elts2);
5886 }
5887 // Create a MDNode <MD, MD, offset 0>
5889 Type::getInt64Ty(Context)))};
5890 return MDNode::get(Context, Elts);
5891}
5892
5894 Instruction *&Temp) {
5895 if (Opc != Instruction::BitCast)
5896 return nullptr;
5897
5898 Temp = nullptr;
5899 Type *SrcTy = V->getType();
5900 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5901 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5902 LLVMContext &Context = V->getContext();
5903
5904 // We have no information about target data layout, so we assume that
5905 // the maximum pointer size is 64bit.
5906 Type *MidTy = Type::getInt64Ty(Context);
5907 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5908
5909 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5910 }
5911
5912 return nullptr;
5913}
5914
5916 if (Opc != Instruction::BitCast)
5917 return nullptr;
5918
5919 Type *SrcTy = C->getType();
5920 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5921 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5922 LLVMContext &Context = C->getContext();
5923
5924 // We have no information about target data layout, so we assume that
5925 // the maximum pointer size is 64bit.
5926 Type *MidTy = Type::getInt64Ty(Context);
5927
5929 DestTy);
5930 }
5931
5932 return nullptr;
5933}
5934
5935/// Check the debug info version number, if it is out-dated, drop the debug
5936/// info. Return true if module is modified.
5939 return false;
5940
5941 llvm::TimeTraceScope timeScope("Upgrade debug info");
5942 // We need to get metadata before the module is verified (i.e., getModuleFlag
5943 // makes assumptions that we haven't verified yet). Carefully extract the flag
5944 // from the metadata.
5945 unsigned Version = 0;
5946 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5947 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5948 if (Flag->getNumOperands() < 3)
5949 return false;
5950 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5951 return K->getString() == "Debug Info Version";
5952 return false;
5953 });
5954 if (OpIt != ModFlags->op_end()) {
5955 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5956 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5957 Version = CI->getZExtValue();
5958 }
5959 }
5960
5962 bool BrokenDebugInfo = false;
5963 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5964 report_fatal_error("Broken module found, compilation aborted!");
5965 if (!BrokenDebugInfo)
5966 // Everything is ok.
5967 return false;
5968 else {
5969 // Diagnose malformed debug info.
5971 M.getContext().diagnose(Diag);
5972 }
5973 }
5974 bool Modified = StripDebugInfo(M);
5976 // Diagnose a version mismatch.
5978 M.getContext().diagnose(DiagVersion);
5979 }
5980 return Modified;
5981}
5982
5983static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5984 GlobalValue *GV, const Metadata *V) {
5985 Function *F = cast<Function>(GV);
5986
5987 constexpr StringLiteral DefaultValue = "1";
5988 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5989 unsigned Length = 0;
5990
5991 if (F->hasFnAttribute(Attr)) {
5992 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5993 // parse these elements placing them into Vect3
5994 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5995 for (; Length < 3 && !S.empty(); Length++) {
5996 auto [Part, Rest] = S.split(',');
5997 Vect3[Length] = Part.trim();
5998 S = Rest;
5999 }
6000 }
6001
6002 const unsigned Dim = DimC - 'x';
6003 assert(Dim < 3 && "Unexpected dim char");
6004
6005 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
6006
6007 // local variable required for StringRef in Vect3 to point to.
6008 const std::string VStr = llvm::utostr(VInt);
6009 Vect3[Dim] = VStr;
6010 Length = std::max(Length, Dim + 1);
6011
6012 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
6013 F->addFnAttr(Attr, NewAttr);
6014}
6015
6016static inline bool isXYZ(StringRef S) {
6017 return S == "x" || S == "y" || S == "z";
6018}
6019
6021 const Metadata *V) {
6022 if (K == "kernel") {
6024 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
6025 return true;
6026 }
6027 if (K == "align") {
6028 // V is a bitfeild specifying two 16-bit values. The alignment value is
6029 // specfied in low 16-bits, The index is specified in the high bits. For the
6030 // index, 0 indicates the return value while higher values correspond to
6031 // each parameter (idx = param + 1).
6032 const uint64_t AlignIdxValuePair =
6033 mdconst::extract<ConstantInt>(V)->getZExtValue();
6034 const unsigned Idx = (AlignIdxValuePair >> 16);
6035 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
6036 cast<Function>(GV)->addAttributeAtIndex(
6037 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
6038 return true;
6039 }
6040 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
6041 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
6043 return true;
6044 }
6045 if (K == "minctasm") {
6046 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
6047 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
6048 return true;
6049 }
6050 if (K == "maxnreg") {
6051 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
6052 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
6053 return true;
6054 }
6055 if (K.consume_front("maxntid") && isXYZ(K)) {
6057 return true;
6058 }
6059 if (K.consume_front("reqntid") && isXYZ(K)) {
6061 return true;
6062 }
6063 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
6065 return true;
6066 }
6067 if (K == "grid_constant") {
6068 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
6069 for (const auto &Op : cast<MDNode>(V)->operands()) {
6070 // For some reason, the index is 1-based in the metadata. Good thing we're
6071 // able to auto-upgrade it!
6072 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
6073 cast<Function>(GV)->addParamAttr(Index, Attr);
6074 }
6075 return true;
6076 }
6077
6078 return false;
6079}
6080
6082 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
6083 if (!NamedMD)
6084 return;
6085
6086 SmallVector<MDNode *, 8> NewNodes;
6088 for (MDNode *MD : NamedMD->operands()) {
6089 if (!SeenNodes.insert(MD).second)
6090 continue;
6091
6092 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
6093 if (!GV)
6094 continue;
6095
6096 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
6097
6098 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
6099 // Each nvvm.annotations metadata entry will be of the following form:
6100 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
6101 // start index = 1, to skip the global variable key
6102 // increment = 2, to skip the value for each property-value pairs
6103 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6104 MDString *K = cast<MDString>(MD->getOperand(j));
6105 const MDOperand &V = MD->getOperand(j + 1);
6106 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
6107 if (!Upgraded)
6108 NewOperands.append({K, V});
6109 }
6110
6111 if (NewOperands.size() > 1)
6112 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
6113 }
6114
6115 NamedMD->clearOperands();
6116 for (MDNode *N : NewNodes)
6117 NamedMD->addOperand(N);
6118}
6119
6120/// This checks for objc retain release marker which should be upgraded. It
6121/// returns true if module is modified.
6123 bool Changed = false;
6124 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6125 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6126 if (ModRetainReleaseMarker) {
6127 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
6128 if (Op) {
6129 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
6130 if (ID) {
6131 SmallVector<StringRef, 4> ValueComp;
6132 ID->getString().split(ValueComp, "#");
6133 if (ValueComp.size() == 2) {
6134 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6135 ID = MDString::get(M.getContext(), NewValue);
6136 }
6137 M.addModuleFlag(Module::Error, MarkerKey, ID);
6138 M.eraseNamedMetadata(ModRetainReleaseMarker);
6139 Changed = true;
6140 }
6141 }
6142 }
6143 return Changed;
6144}
6145
6147 // This lambda converts normal function calls to ARC runtime functions to
6148 // intrinsic calls.
6149 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6150 llvm::Intrinsic::ID IntrinsicFunc) {
6151 Function *Fn = M.getFunction(OldFunc);
6152
6153 if (!Fn)
6154 return;
6155
6156 Function *NewFn =
6157 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
6158
6159 for (User *U : make_early_inc_range(Fn->users())) {
6161 if (!CI || CI->getCalledFunction() != Fn)
6162 continue;
6163
6164 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6165 FunctionType *NewFuncTy = NewFn->getFunctionType();
6167
6168 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6169 // value to the return type of the old function.
6170 if (NewFuncTy->getReturnType() != CI->getType() &&
6171 !CastInst::castIsValid(Instruction::BitCast, CI,
6172 NewFuncTy->getReturnType()))
6173 continue;
6174
6175 bool InvalidCast = false;
6176
6177 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6178 Value *Arg = CI->getArgOperand(I);
6179
6180 // Bitcast argument to the parameter type of the new function if it's
6181 // not a variadic argument.
6182 if (I < NewFuncTy->getNumParams()) {
6183 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6184 // to the parameter type of the new function.
6185 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6186 NewFuncTy->getParamType(I))) {
6187 InvalidCast = true;
6188 break;
6189 }
6190 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6191 }
6192 Args.push_back(Arg);
6193 }
6194
6195 if (InvalidCast)
6196 continue;
6197
6198 // Create a call instruction that calls the new function.
6199 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6200 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6201 NewCall->takeName(CI);
6202
6203 // Bitcast the return value back to the type of the old call.
6204 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6205
6206 if (!CI->use_empty())
6207 CI->replaceAllUsesWith(NewRetVal);
6208 CI->eraseFromParent();
6209 }
6210
6211 if (Fn->use_empty())
6212 Fn->eraseFromParent();
6213 };
6214
6215 // Unconditionally convert a call to "clang.arc.use" to a call to
6216 // "llvm.objc.clang.arc.use".
6217 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6218
6219 // Upgrade the retain release marker. If there is no need to upgrade
6220 // the marker, that means either the module is already new enough to contain
6221 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6223 return;
6224
6225 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6226 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6227 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6228 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6229 {"objc_autoreleaseReturnValue",
6230 llvm::Intrinsic::objc_autoreleaseReturnValue},
6231 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6232 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6233 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6234 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6235 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6236 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6237 {"objc_release", llvm::Intrinsic::objc_release},
6238 {"objc_retain", llvm::Intrinsic::objc_retain},
6239 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6240 {"objc_retainAutoreleaseReturnValue",
6241 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6242 {"objc_retainAutoreleasedReturnValue",
6243 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6244 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6245 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6246 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6247 {"objc_unsafeClaimAutoreleasedReturnValue",
6248 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6249 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6250 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6251 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6252 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6253 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6254 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6255 {"objc_arc_annotation_topdown_bbstart",
6256 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6257 {"objc_arc_annotation_topdown_bbend",
6258 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6259 {"objc_arc_annotation_bottomup_bbstart",
6260 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6261 {"objc_arc_annotation_bottomup_bbend",
6262 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6263
6264 for (auto &I : RuntimeFuncs)
6265 UpgradeToIntrinsic(I.first, I.second);
6266}
6267
6269 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6270 if (!ModFlags)
6271 return false;
6272
6273 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6274 bool HasSwiftVersionFlag = false;
6275 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6276 uint32_t SwiftABIVersion;
6277 auto Int8Ty = Type::getInt8Ty(M.getContext());
6278 auto Int32Ty = Type::getInt32Ty(M.getContext());
6279
6280 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6281 MDNode *Op = ModFlags->getOperand(I);
6282 if (Op->getNumOperands() != 3)
6283 continue;
6284 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6285 if (!ID)
6286 continue;
6287 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6288 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6289 Type::getInt32Ty(M.getContext()), B)),
6290 MDString::get(M.getContext(), ID->getString()),
6291 Op->getOperand(2)};
6292 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6293 Changed = true;
6294 };
6295
6296 if (ID->getString() == "Objective-C Image Info Version")
6297 HasObjCFlag = true;
6298 if (ID->getString() == "Objective-C Class Properties")
6299 HasClassProperties = true;
6300 // Upgrade PIC from Error/Max to Min.
6301 if (ID->getString() == "PIC Level") {
6302 if (auto *Behavior =
6304 uint64_t V = Behavior->getLimitedValue();
6305 if (V == Module::Error || V == Module::Max)
6306 SetBehavior(Module::Min);
6307 }
6308 }
6309 // Upgrade "PIE Level" from Error to Max.
6310 if (ID->getString() == "PIE Level")
6311 if (auto *Behavior =
6313 if (Behavior->getLimitedValue() == Module::Error)
6314 SetBehavior(Module::Max);
6315
6316 // Upgrade branch protection and return address signing module flags. The
6317 // module flag behavior for these fields were Error and now they are Min.
6318 if (ID->getString() == "branch-target-enforcement" ||
6319 ID->getString().starts_with("sign-return-address")) {
6320 if (auto *Behavior =
6322 if (Behavior->getLimitedValue() == Module::Error) {
6323 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6324 Metadata *Ops[3] = {
6325 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6326 Op->getOperand(1), Op->getOperand(2)};
6327 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6328 Changed = true;
6329 }
6330 }
6331 }
6332
6333 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6334 // section name so that llvm-lto will not complain about mismatching
6335 // module flags that is functionally the same.
6336 if (ID->getString() == "Objective-C Image Info Section") {
6337 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6338 SmallVector<StringRef, 4> ValueComp;
6339 Value->getString().split(ValueComp, " ");
6340 if (ValueComp.size() != 1) {
6341 std::string NewValue;
6342 for (auto &S : ValueComp)
6343 NewValue += S.str();
6344 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6345 MDString::get(M.getContext(), NewValue)};
6346 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6347 Changed = true;
6348 }
6349 }
6350 }
6351
6352 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6353 // If the higher bits are set, it adds new module flag for swift info.
6354 if (ID->getString() == "Objective-C Garbage Collection") {
6355 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6356 if (Md) {
6357 assert(Md->getValue() && "Expected non-empty metadata");
6358 auto Type = Md->getValue()->getType();
6359 if (Type == Int8Ty)
6360 continue;
6361 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6362 if ((Val & 0xff) != Val) {
6363 HasSwiftVersionFlag = true;
6364 SwiftABIVersion = (Val & 0xff00) >> 8;
6365 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6366 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6367 }
6368 Metadata *Ops[3] = {
6370 Op->getOperand(1),
6371 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6372 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6373 Changed = true;
6374 }
6375 }
6376
6377 if (ID->getString() == "amdgpu_code_object_version") {
6378 Metadata *Ops[3] = {
6379 Op->getOperand(0),
6380 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6381 Op->getOperand(2)};
6382 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6383 Changed = true;
6384 }
6385 }
6386
6387 // "Objective-C Class Properties" is recently added for Objective-C. We
6388 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6389 // flag of value 0, so we can correclty downgrade this flag when trying to
6390 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6391 // this module flag.
6392 if (HasObjCFlag && !HasClassProperties) {
6393 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6394 (uint32_t)0);
6395 Changed = true;
6396 }
6397
6398 if (HasSwiftVersionFlag) {
6399 M.addModuleFlag(Module::Error, "Swift ABI Version",
6400 SwiftABIVersion);
6401 M.addModuleFlag(Module::Error, "Swift Major Version",
6402 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6403 M.addModuleFlag(Module::Error, "Swift Minor Version",
6404 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6405 Changed = true;
6406 }
6407
6408 return Changed;
6409}
6410
6412 NamedMDNode *CFIConsts = M.getNamedMetadata("cfi.functions");
6413 // If this metadata has operands, we expect all of them to be either from
6414 // before or from after the format change handled here, so we can bail out
6415 // fast if the first (if any) operands is of the new format.
6416 auto MatchesVersion = [](const MDNode *Op) {
6417 return Op->getNumOperands() >= 3 &&
6418 isa<ConstantAsMetadata>(Op->getOperand(2)) &&
6419 cast<ConstantAsMetadata>(Op->getOperand(2))
6420 ->getType()
6421 ->isIntegerTy(64);
6422 };
6423
6424 if (!CFIConsts || !CFIConsts->getNumOperands() ||
6425 MatchesVersion(CFIConsts->getOperand(0)))
6426 return false;
6427
6428 bool Changed = false;
6429 for (unsigned I = 0, E = CFIConsts->getNumOperands(); I != E; ++I) {
6430 MDNode *Op = CFIConsts->getOperand(I);
6431 assert(!MatchesVersion(Op) && "Unexpected mix of CFIConstant formats");
6432 assert(Op->getNumOperands() >= 2 &&
6433 "Expected at least 2 operands - name and linkage type");
6434 MDString *NameMD = dyn_cast<MDString>(Op->getOperand(0));
6435 StringRef Name = NameMD->getString();
6438
6440 Elts.push_back(Op->getOperand(0));
6441 Elts.push_back(Op->getOperand(1));
6443 ConstantInt::get(Type::getInt64Ty(M.getContext()), GUID)));
6444
6445 for (unsigned J = 2, EJ = Op->getNumOperands(); J != EJ; ++J)
6446 Elts.push_back(Op->getOperand(J));
6447
6448 CFIConsts->setOperand(I, MDNode::get(M.getContext(), Elts));
6449 Changed = true;
6450 }
6451
6452 return Changed;
6453}
6454
6456 auto TrimSpaces = [](StringRef Section) -> std::string {
6457 SmallVector<StringRef, 5> Components;
6458 Section.split(Components, ',');
6459
6460 SmallString<32> Buffer;
6461 raw_svector_ostream OS(Buffer);
6462
6463 for (auto Component : Components)
6464 OS << ',' << Component.trim();
6465
6466 return std::string(OS.str().substr(1));
6467 };
6468
6469 for (auto &GV : M.globals()) {
6470 if (!GV.hasSection())
6471 continue;
6472
6473 StringRef Section = GV.getSection();
6474
6475 if (!Section.starts_with("__DATA, __objc_catlist"))
6476 continue;
6477
6478 // __DATA, __objc_catlist, regular, no_dead_strip
6479 // __DATA,__objc_catlist,regular,no_dead_strip
6480 GV.setSection(TrimSpaces(Section));
6481 }
6482}
6483
6484namespace {
6485// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6486// callsites within a function that did not also have the strictfp attribute.
6487// Since 10.0, if strict FP semantics are needed within a function, the
6488// function must have the strictfp attribute and all calls within the function
6489// must also have the strictfp attribute. This latter restriction is
6490// necessary to prevent unwanted libcall simplification when a function is
6491// being cloned (such as for inlining).
6492//
6493// The "dangling" strictfp attribute usage was only used to prevent constant
6494// folding and other libcall simplification. The nobuiltin attribute on the
6495// callsite has the same effect.
6496struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6497 StrictFPUpgradeVisitor() = default;
6498
6499 void visitCallBase(CallBase &Call) {
6500 if (!Call.isStrictFP())
6501 return;
6503 return;
6504 // If we get here, the caller doesn't have the strictfp attribute
6505 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6506 Call.removeFnAttr(Attribute::StrictFP);
6507 Call.addFnAttr(Attribute::NoBuiltin);
6508 }
6509};
6510
6511/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6512struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6513 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6514 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6515
6516 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6517 if (!RMW.isFloatingPointOperation())
6518 return;
6519
6520 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6521 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6522 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6523 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6524 }
6525};
6526} // namespace
6527
6529 // If a function definition doesn't have the strictfp attribute,
6530 // convert any callsite strictfp attributes to nobuiltin.
6531 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6532 StrictFPUpgradeVisitor SFPV;
6533 SFPV.visit(F);
6534 }
6535
6536 // Remove all incompatibile attributes from function.
6537 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6538 F.getReturnType(), F.getAttributes().getRetAttrs()));
6539 for (auto &Arg : F.args())
6540 Arg.removeAttrs(
6541 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6542
6543 bool AddingAttrs = false, RemovingAttrs = false;
6544 AttrBuilder AttrsToAdd(F.getContext());
6545 AttributeMask AttrsToRemove;
6546
6547 // Older versions of LLVM treated an "implicit-section-name" attribute
6548 // similarly to directly setting the section on a Function.
6549 if (Attribute A = F.getFnAttribute("implicit-section-name");
6550 A.isValid() && A.isStringAttribute()) {
6551 F.setSection(A.getValueAsString());
6552 AttrsToRemove.addAttribute("implicit-section-name");
6553 RemovingAttrs = true;
6554 }
6555
6556 if (Attribute A = F.getFnAttribute("nooutline");
6557 A.isValid() && A.isStringAttribute()) {
6558 AttrsToRemove.addAttribute("nooutline");
6559 AttrsToAdd.addAttribute(Attribute::NoOutline);
6560 AddingAttrs = RemovingAttrs = true;
6561 }
6562
6563 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6564 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6565 AttrsToRemove.addAttribute("uniform-work-group-size");
6566 RemovingAttrs = true;
6567 if (A.getValueAsString() == "true") {
6568 AttrsToAdd.addAttribute("uniform-work-group-size");
6569 AddingAttrs = true;
6570 }
6571 }
6572
6573 if (!F.empty()) {
6574 // For some reason this is called twice, and the first time is before any
6575 // instructions are loaded into the body.
6576
6577 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6578 A.isValid()) {
6579
6580 if (A.getValueAsBool()) {
6581 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6582 Visitor.visit(F);
6583 }
6584
6585 // We will leave behind dead attribute uses on external declarations, but
6586 // clang never added these to declarations anyway.
6587 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6588 RemovingAttrs = true;
6589 }
6590 }
6591
6592 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6593 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6594
6595 bool HandleDenormalMode = false;
6596
6597 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6598 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6599 if (ParsedMode.isValid()) {
6600 DenormalFPMath = ParsedMode;
6601 AttrsToRemove.addAttribute("denormal-fp-math");
6602 AddingAttrs = RemovingAttrs = true;
6603 HandleDenormalMode = true;
6604 }
6605 }
6606
6607 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6608 Attr.isValid()) {
6609 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6610 if (ParsedMode.isValid()) {
6611 DenormalFPMathF32 = ParsedMode;
6612 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6613 AddingAttrs = RemovingAttrs = true;
6614 HandleDenormalMode = true;
6615 }
6616 }
6617
6618 if (HandleDenormalMode)
6619 AttrsToAdd.addDenormalFPEnvAttr(
6620 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6621
6622 if (RemovingAttrs)
6623 F.removeFnAttrs(AttrsToRemove);
6624
6625 if (AddingAttrs)
6626 F.addFnAttrs(AttrsToAdd);
6627}
6628
6629// Check if the function attribute is not present and set it.
6631 StringRef Value) {
6632 if (!F.hasFnAttribute(FnAttrName))
6633 F.addFnAttr(FnAttrName, Value);
6634}
6635
6636// Check if the function attribute is not present and set it if needed.
6637// If the attribute is "false" then removes it.
6638// If the attribute is "true" resets it to a valueless attribute.
6639static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6640 if (!F.hasFnAttribute(FnAttrName)) {
6641 if (Set)
6642 F.addFnAttr(FnAttrName);
6643 } else {
6644 auto A = F.getFnAttribute(FnAttrName);
6645 if ("false" == A.getValueAsString())
6646 F.removeFnAttr(FnAttrName);
6647 else if ("true" == A.getValueAsString()) {
6648 F.removeFnAttr(FnAttrName);
6649 F.addFnAttr(FnAttrName);
6650 }
6651 }
6652}
6653
6655 Triple T(M.getTargetTriple());
6656 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6657 return;
6658
6659 uint64_t BTEValue = 0;
6660 uint64_t BPPLRValue = 0;
6661 uint64_t GCSValue = 0;
6662 uint64_t SRAValue = 0;
6663 uint64_t SRAALLValue = 0;
6664 uint64_t SRABKeyValue = 0;
6665
6666 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6667 if (ModFlags) {
6668 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6669 MDNode *Op = ModFlags->getOperand(I);
6670 if (Op->getNumOperands() != 3)
6671 continue;
6672
6673 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6674 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6675 if (!ID || !CI)
6676 continue;
6677
6678 StringRef IDStr = ID->getString();
6679 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6680 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6681 : IDStr == "guarded-control-stack" ? &GCSValue
6682 : IDStr == "sign-return-address" ? &SRAValue
6683 : IDStr == "sign-return-address-all" ? &SRAALLValue
6684 : IDStr == "sign-return-address-with-bkey"
6685 ? &SRABKeyValue
6686 : nullptr;
6687 if (!ValPtr)
6688 continue;
6689
6690 *ValPtr = CI->getZExtValue();
6691 if (*ValPtr == 2)
6692 return;
6693 }
6694 }
6695
6696 bool BTE = BTEValue == 1;
6697 bool BPPLR = BPPLRValue == 1;
6698 bool GCS = GCSValue == 1;
6699 bool SRA = SRAValue == 1;
6700
6701 StringRef SignTypeValue = "non-leaf";
6702 if (SRA && SRAALLValue == 1)
6703 SignTypeValue = "all";
6704
6705 StringRef SignKeyValue = "a_key";
6706 if (SRA && SRABKeyValue == 1)
6707 SignKeyValue = "b_key";
6708
6709 for (Function &F : M.getFunctionList()) {
6710 if (F.isDeclaration())
6711 continue;
6712
6713 if (SRA) {
6714 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6715 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6716 } else {
6717 if (auto A = F.getFnAttribute("sign-return-address");
6718 A.isValid() && "none" == A.getValueAsString()) {
6719 F.removeFnAttr("sign-return-address");
6720 F.removeFnAttr("sign-return-address-key");
6721 }
6722 }
6723 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6724 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6725 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6726 }
6727
6728 if (BTE)
6729 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6730 if (BPPLR)
6731 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6732 if (GCS)
6733 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6734 if (SRA) {
6735 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6736 if (SRAALLValue == 1)
6737 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6738 if (SRABKeyValue == 1)
6739 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6740 }
6741}
6742
6743static bool isOldLoopArgument(Metadata *MD) {
6744 auto *T = dyn_cast_or_null<MDTuple>(MD);
6745 if (!T)
6746 return false;
6747 if (T->getNumOperands() < 1)
6748 return false;
6749 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6750 if (!S)
6751 return false;
6752 return S->getString().starts_with("llvm.vectorizer.");
6753}
6754
6756 StringRef OldPrefix = "llvm.vectorizer.";
6757 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6758
6759 if (OldTag == "llvm.vectorizer.unroll")
6760 return MDString::get(C, "llvm.loop.interleave.count");
6761
6762 return MDString::get(
6763 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6764 .str());
6765}
6766
6768 auto *T = dyn_cast_or_null<MDTuple>(MD);
6769 if (!T)
6770 return MD;
6771 if (T->getNumOperands() < 1)
6772 return MD;
6773 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6774 if (!OldTag)
6775 return MD;
6776 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6777 return MD;
6778
6779 // This has an old tag. Upgrade it.
6781 Ops.reserve(T->getNumOperands());
6782 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6783 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6784 Ops.push_back(T->getOperand(I));
6785
6786 return MDTuple::get(T->getContext(), Ops);
6787}
6788
6790 auto *T = dyn_cast<MDTuple>(&N);
6791 if (!T)
6792 return &N;
6793
6794 if (none_of(T->operands(), isOldLoopArgument))
6795 return &N;
6796
6798 Ops.reserve(T->getNumOperands());
6799 for (Metadata *MD : T->operands())
6800 Ops.push_back(upgradeLoopArgument(MD));
6801
6802 return MDTuple::get(T->getContext(), Ops);
6803}
6804
6806 Triple T(TT);
6807 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6808 // the address space of globals to 1. This does not apply to SPIRV Logical.
6809 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6810 !DL.contains("-G") && !DL.starts_with("G")) {
6811 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6812 }
6813
6814 if (T.isLoongArch64() || T.isRISCV64()) {
6815 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6816 auto I = DL.find("-n64-");
6817 if (I != StringRef::npos)
6818 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6819 return DL.str();
6820 }
6821
6822 // AMDGPU data layout upgrades.
6823 std::string Res = DL.str();
6824 if (T.isAMDGPU()) {
6825 // Define address spaces for constants.
6826 if (!DL.contains("-G") && !DL.starts_with("G"))
6827 Res.append(Res.empty() ? "G1" : "-G1");
6828
6829 // AMDGCN data layout upgrades.
6830 if (T.isAMDGCN()) {
6831
6832 // Add missing non-integral declarations.
6833 // This goes before adding new address spaces to prevent incoherent string
6834 // values.
6835 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6836 Res.append("-ni:7:8:9");
6837 // Update ni:7 to ni:7:8:9.
6838 if (DL.ends_with("ni:7"))
6839 Res.append(":8:9");
6840 if (DL.ends_with("ni:7:8"))
6841 Res.append(":9");
6842
6843 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6844 // resources) An empty data layout has already been upgraded to G1 by now.
6845 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6846 Res.append("-p7:160:256:256:32");
6847 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6848 Res.append("-p8:128:128:128:48");
6849 constexpr StringRef OldP8("-p8:128:128-");
6850 if (DL.contains(OldP8))
6851 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6852 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6853 Res.append("-p9:192:256:256:32");
6854 }
6855
6856 // Upgrade the ELF mangling mode.
6857 if (!DL.contains("m:e"))
6858 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6859
6860 return Res;
6861 }
6862
6863 if (T.isSystemZ() && !DL.empty()) {
6864 // Make sure the stack alignment is present.
6865 if (!DL.contains("-S64"))
6866 return "E-S64" + DL.drop_front(1).str();
6867 return DL.str();
6868 }
6869
6870 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6871 // If the datalayout matches the expected format, add pointer size address
6872 // spaces to the datalayout.
6873 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6874 if (!DL.contains(AddrSpaces)) {
6876 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6877 if (R.match(Res, &Groups))
6878 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6879 }
6880 };
6881
6882 // AArch64 data layout upgrades.
6883 if (T.isAArch64()) {
6884 // Add "-Fn32"
6885 if (!DL.empty() && !DL.contains("-Fn32"))
6886 Res.append("-Fn32");
6887 AddPtr32Ptr64AddrSpaces();
6888 return Res;
6889 }
6890
6891 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6892 T.isWasm()) {
6893 // Mips64 with o32 ABI did not add "-i128:128".
6894 // Add "-i128:128"
6895 std::string I64 = "-i64:64";
6896 std::string I128 = "-i128:128";
6897 if (!StringRef(Res).contains(I128)) {
6898 size_t Pos = Res.find(I64);
6899 if (Pos != size_t(-1))
6900 Res.insert(Pos + I64.size(), I128);
6901 }
6902 }
6903
6904 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6905 size_t Pos = Res.find("-S128");
6906 if (Pos == StringRef::npos)
6907 Pos = Res.size();
6908 Res.insert(Pos, "-f64:32:64");
6909 }
6910
6911 if (!T.isX86())
6912 return Res;
6913
6914 AddPtr32Ptr64AddrSpaces();
6915
6916 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6917 // for i128 operations prior to this being reflected in the data layout, and
6918 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6919 // boundaries, so although this is a breaking change, the upgrade is expected
6920 // to fix more IR than it breaks.
6921 // Intel MCU is an exception and uses 4-byte-alignment.
6922 if (!T.isOSIAMCU()) {
6923 std::string I128 = "-i128:128";
6924 if (StringRef Ref = Res; !Ref.contains(I128)) {
6926 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6927 if (R.match(Res, &Groups))
6928 Res = (Groups[1] + I128 + Groups[3]).str();
6929 }
6930 }
6931
6932 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6933 // Raising the alignment is safe because Clang did not produce f80 values in
6934 // the MSVC environment before this upgrade was added.
6935 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6936 StringRef Ref = Res;
6937 auto I = Ref.find("-f80:32-");
6938 if (I != StringRef::npos)
6939 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6940 }
6941
6942 return Res;
6943}
6944
6945void llvm::UpgradeAttributes(AttrBuilder &B) {
6946 StringRef FramePointer;
6947 Attribute A = B.getAttribute("no-frame-pointer-elim");
6948 if (A.isValid()) {
6949 // The value can be "true" or "false".
6950 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6951 B.removeAttribute("no-frame-pointer-elim");
6952 }
6953 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6954 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6955 if (FramePointer != "all")
6956 FramePointer = "non-leaf";
6957 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6958 }
6959 if (!FramePointer.empty())
6960 B.addAttribute("frame-pointer", FramePointer);
6961
6962 A = B.getAttribute("null-pointer-is-valid");
6963 if (A.isValid()) {
6964 // The value can be "true" or "false".
6965 bool NullPointerIsValid = A.getValueAsString() == "true";
6966 B.removeAttribute("null-pointer-is-valid");
6967 if (NullPointerIsValid)
6968 B.addAttribute(Attribute::NullPointerIsValid);
6969 }
6970
6971 A = B.getAttribute("uniform-work-group-size");
6972 if (A.isValid()) {
6973 StringRef Val = A.getValueAsString();
6974 if (!Val.empty()) {
6975 bool IsTrue = Val == "true";
6976 B.removeAttribute("uniform-work-group-size");
6977 if (IsTrue)
6978 B.addAttribute("uniform-work-group-size");
6979 }
6980 }
6981}
6982
6983void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6984 // clang.arc.attachedcall bundles are now required to have an operand.
6985 // If they don't, it's okay to drop them entirely: when there is an operand,
6986 // the "attachedcall" is meaningful and required, but without an operand,
6987 // it's just a marker NOP. Dropping it merely prevents an optimization.
6988 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6989 return OBD.getTag() == "clang.arc.attachedcall" &&
6990 OBD.inputs().empty();
6991 });
6992}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
void setDebugLoc(DebugLoc Loc)
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:445
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:886
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:80
LinkageTypes getLinkage() const
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
static StringRef dropLLVMManglingEscape(StringRef Name)
If the given string begins with the GlobalValue name mangling escape character '\1',...
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:629
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2900
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1433
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1439
LLVMContext & getContext() const
Definition Metadata.h:1233
Tracking metadata reference owned by Metadata.
Definition Metadata.h:891
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1518
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1749
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1845
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:891
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
static constexpr size_t npos
Definition StringRef.h:58
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:479
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:308
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:284
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:394
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
@ Length
Definition DWP.cpp:558
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool UpgradeCFIFunctionsMetadata(Module &M)
Upgrade the cfi.functions metadata node by calculating and inserting the GUID for each function entry...
LLVM_ABI void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106