LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
52#include "llvm/Support/Regex.h"
55#include <cstdint>
56#include <cstring>
57#include <numeric>
58
59using namespace llvm;
60
61static cl::opt<bool>
62 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
63 cl::desc("Disable autoupgrade of debug info"));
64
65static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
66
67// Report a fatal error along with the
68// Call Instruction which caused the error
69[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
70 CallBase *CI) {
71 CI->print(llvm::errs());
72 llvm::errs() << "\n";
74}
75
76// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
77// changed their type from v4f32 to v2i64.
79 Function *&NewFn) {
80 // Check whether this is an old version of the function, which received
81 // v4f32 arguments.
82 Type *Arg0Type = F->getFunctionType()->getParamType(0);
83 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
84 return false;
85
86 // Yes, it's old, replace it with new version.
87 rename(F);
88 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
89 return true;
90}
91
92// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
93// arguments have changed their type from i32 to i8.
95 Function *&NewFn) {
96 // Check that the last argument is an i32.
97 Type *LastArgType = F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
99 if (!LastArgType->isIntegerTy(32))
100 return false;
101
102 // Move this function aside and map down.
103 rename(F);
104 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
105 return true;
106}
107
108// Upgrade the declaration of fp compare intrinsics that change return type
109// from scalar to vXi1 mask.
111 Function *&NewFn) {
112 // Check if the return type is a vector.
113 if (F->getReturnType()->isVectorTy())
114 return false;
115
116 rename(F);
117 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
118 return true;
119}
120
121// Upgrade the declaration of multiply and add bytes intrinsics whose input
122// arguments' types have changed from vectors of i32 to vectors of i8
124 Function *&NewFn) {
125 // check if input argument type is a vector of i8
126 Type *Arg1Type = F->getFunctionType()->getParamType(1);
127 Type *Arg2Type = F->getFunctionType()->getParamType(2);
128 if (Arg1Type->isVectorTy() &&
129 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
130 Arg2Type->isVectorTy() &&
131 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
132 return false;
133
134 rename(F);
135 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
136 return true;
137}
138
139// Upgrade the declaration of multipy and add words intrinsics whose input
140// arguments' types have changed to vectors of i32 to vectors of i16
142 Function *&NewFn) {
143 // check if input argument type is a vector of i16
144 Type *Arg1Type = F->getFunctionType()->getParamType(1);
145 Type *Arg2Type = F->getFunctionType()->getParamType(2);
146 if (Arg1Type->isVectorTy() &&
147 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
148 Arg2Type->isVectorTy() &&
149 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getReturnType()->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 Function *&NewFn) {
169 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
170 return false;
171
172 rename(F);
173 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
174 return true;
175}
176
178 // All of the intrinsics matches below should be marked with which llvm
179 // version started autoupgrading them. At some point in the future we would
180 // like to use this information to remove upgrade code for some older
181 // intrinsics. It is currently undecided how we will determine that future
182 // point.
183 if (Name.consume_front("avx."))
184 return (Name.starts_with("blend.p") || // Added in 3.7
185 Name == "cvt.ps2.pd.256" || // Added in 3.9
186 Name == "cvtdq2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.ps.256" || // Added in 7.0
188 Name.starts_with("movnt.") || // Added in 3.2
189 Name.starts_with("sqrt.p") || // Added in 7.0
190 Name.starts_with("storeu.") || // Added in 3.9
191 Name.starts_with("vbroadcast.s") || // Added in 3.5
192 Name.starts_with("vbroadcastf128") || // Added in 4.0
193 Name.starts_with("vextractf128.") || // Added in 3.7
194 Name.starts_with("vinsertf128.") || // Added in 3.7
195 Name.starts_with("vperm2f128.") || // Added in 6.0
196 Name.starts_with("vpermil.")); // Added in 3.1
197
198 if (Name.consume_front("avx2."))
199 return (Name == "movntdqa" || // Added in 5.0
200 Name.starts_with("pabs.") || // Added in 6.0
201 Name.starts_with("padds.") || // Added in 8.0
202 Name.starts_with("paddus.") || // Added in 8.0
203 Name.starts_with("pblendd.") || // Added in 3.7
204 Name == "pblendw" || // Added in 3.7
205 Name.starts_with("pbroadcast") || // Added in 3.8
206 Name.starts_with("pcmpeq.") || // Added in 3.1
207 Name.starts_with("pcmpgt.") || // Added in 3.1
208 Name.starts_with("pmax") || // Added in 3.9
209 Name.starts_with("pmin") || // Added in 3.9
210 Name.starts_with("pmovsx") || // Added in 3.9
211 Name.starts_with("pmovzx") || // Added in 3.9
212 Name == "pmul.dq" || // Added in 7.0
213 Name == "pmulu.dq" || // Added in 7.0
214 Name.starts_with("psll.dq") || // Added in 3.7
215 Name.starts_with("psrl.dq") || // Added in 3.7
216 Name.starts_with("psubs.") || // Added in 8.0
217 Name.starts_with("psubus.") || // Added in 8.0
218 Name.starts_with("vbroadcast") || // Added in 3.8
219 Name == "vbroadcasti128" || // Added in 3.7
220 Name == "vextracti128" || // Added in 3.7
221 Name == "vinserti128" || // Added in 3.7
222 Name == "vperm2i128"); // Added in 6.0
223
224 if (Name.consume_front("avx512.")) {
225 if (Name.consume_front("mask."))
226 // 'avx512.mask.*'
227 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
228 Name.starts_with("and.") || // Added in 3.9
229 Name.starts_with("andn.") || // Added in 3.9
230 Name.starts_with("broadcast.s") || // Added in 3.9
231 Name.starts_with("broadcastf32x4.") || // Added in 6.0
232 Name.starts_with("broadcastf32x8.") || // Added in 6.0
233 Name.starts_with("broadcastf64x2.") || // Added in 6.0
234 Name.starts_with("broadcastf64x4.") || // Added in 6.0
235 Name.starts_with("broadcasti32x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x8.") || // Added in 6.0
237 Name.starts_with("broadcasti64x2.") || // Added in 6.0
238 Name.starts_with("broadcasti64x4.") || // Added in 6.0
239 Name.starts_with("cmp.b") || // Added in 5.0
240 Name.starts_with("cmp.d") || // Added in 5.0
241 Name.starts_with("cmp.q") || // Added in 5.0
242 Name.starts_with("cmp.w") || // Added in 5.0
243 Name.starts_with("compress.b") || // Added in 9.0
244 Name.starts_with("compress.d") || // Added in 9.0
245 Name.starts_with("compress.p") || // Added in 9.0
246 Name.starts_with("compress.q") || // Added in 9.0
247 Name.starts_with("compress.store.") || // Added in 7.0
248 Name.starts_with("compress.w") || // Added in 9.0
249 Name.starts_with("conflict.") || // Added in 9.0
250 Name.starts_with("cvtdq2pd.") || // Added in 4.0
251 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
252 Name == "cvtpd2dq.256" || // Added in 7.0
253 Name == "cvtpd2ps.256" || // Added in 7.0
254 Name == "cvtps2pd.128" || // Added in 7.0
255 Name == "cvtps2pd.256" || // Added in 7.0
256 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
257 Name == "cvtqq2ps.256" || // Added in 9.0
258 Name == "cvtqq2ps.512" || // Added in 9.0
259 Name == "cvttpd2dq.256" || // Added in 7.0
260 Name == "cvttps2dq.128" || // Added in 7.0
261 Name == "cvttps2dq.256" || // Added in 7.0
262 Name.starts_with("cvtudq2pd.") || // Added in 4.0
263 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
264 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
265 Name == "cvtuqq2ps.256" || // Added in 9.0
266 Name == "cvtuqq2ps.512" || // Added in 9.0
267 Name.starts_with("dbpsadbw.") || // Added in 7.0
268 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
269 Name.starts_with("expand.b") || // Added in 9.0
270 Name.starts_with("expand.d") || // Added in 9.0
271 Name.starts_with("expand.load.") || // Added in 7.0
272 Name.starts_with("expand.p") || // Added in 9.0
273 Name.starts_with("expand.q") || // Added in 9.0
274 Name.starts_with("expand.w") || // Added in 9.0
275 Name.starts_with("fpclass.p") || // Added in 7.0
276 Name.starts_with("insert") || // Added in 4.0
277 Name.starts_with("load.") || // Added in 3.9
278 Name.starts_with("loadu.") || // Added in 3.9
279 Name.starts_with("lzcnt.") || // Added in 5.0
280 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
281 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("movddup") || // Added in 3.9
283 Name.starts_with("move.s") || // Added in 4.0
284 Name.starts_with("movshdup") || // Added in 3.9
285 Name.starts_with("movsldup") || // Added in 3.9
286 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
287 Name.starts_with("or.") || // Added in 3.9
288 Name.starts_with("pabs.") || // Added in 6.0
289 Name.starts_with("packssdw.") || // Added in 5.0
290 Name.starts_with("packsswb.") || // Added in 5.0
291 Name.starts_with("packusdw.") || // Added in 5.0
292 Name.starts_with("packuswb.") || // Added in 5.0
293 Name.starts_with("padd.") || // Added in 4.0
294 Name.starts_with("padds.") || // Added in 8.0
295 Name.starts_with("paddus.") || // Added in 8.0
296 Name.starts_with("palignr.") || // Added in 3.9
297 Name.starts_with("pand.") || // Added in 3.9
298 Name.starts_with("pandn.") || // Added in 3.9
299 Name.starts_with("pavg") || // Added in 6.0
300 Name.starts_with("pbroadcast") || // Added in 6.0
301 Name.starts_with("pcmpeq.") || // Added in 3.9
302 Name.starts_with("pcmpgt.") || // Added in 3.9
303 Name.starts_with("perm.df.") || // Added in 3.9
304 Name.starts_with("perm.di.") || // Added in 3.9
305 Name.starts_with("permvar.") || // Added in 7.0
306 Name.starts_with("pmaddubs.w.") || // Added in 7.0
307 Name.starts_with("pmaddw.d.") || // Added in 7.0
308 Name.starts_with("pmax") || // Added in 4.0
309 Name.starts_with("pmin") || // Added in 4.0
310 Name == "pmov.qd.256" || // Added in 9.0
311 Name == "pmov.qd.512" || // Added in 9.0
312 Name == "pmov.wb.256" || // Added in 9.0
313 Name == "pmov.wb.512" || // Added in 9.0
314 Name.starts_with("pmovsx") || // Added in 4.0
315 Name.starts_with("pmovzx") || // Added in 4.0
316 Name.starts_with("pmul.dq.") || // Added in 4.0
317 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
318 Name.starts_with("pmulh.w.") || // Added in 7.0
319 Name.starts_with("pmulhu.w.") || // Added in 7.0
320 Name.starts_with("pmull.") || // Added in 4.0
321 Name.starts_with("pmultishift.qb.") || // Added in 8.0
322 Name.starts_with("pmulu.dq.") || // Added in 4.0
323 Name.starts_with("por.") || // Added in 3.9
324 Name.starts_with("prol.") || // Added in 8.0
325 Name.starts_with("prolv.") || // Added in 8.0
326 Name.starts_with("pror.") || // Added in 8.0
327 Name.starts_with("prorv.") || // Added in 8.0
328 Name.starts_with("pshuf.b.") || // Added in 4.0
329 Name.starts_with("pshuf.d.") || // Added in 3.9
330 Name.starts_with("pshufh.w.") || // Added in 3.9
331 Name.starts_with("pshufl.w.") || // Added in 3.9
332 Name.starts_with("psll.d") || // Added in 4.0
333 Name.starts_with("psll.q") || // Added in 4.0
334 Name.starts_with("psll.w") || // Added in 4.0
335 Name.starts_with("pslli") || // Added in 4.0
336 Name.starts_with("psllv") || // Added in 4.0
337 Name.starts_with("psra.d") || // Added in 4.0
338 Name.starts_with("psra.q") || // Added in 4.0
339 Name.starts_with("psra.w") || // Added in 4.0
340 Name.starts_with("psrai") || // Added in 4.0
341 Name.starts_with("psrav") || // Added in 4.0
342 Name.starts_with("psrl.d") || // Added in 4.0
343 Name.starts_with("psrl.q") || // Added in 4.0
344 Name.starts_with("psrl.w") || // Added in 4.0
345 Name.starts_with("psrli") || // Added in 4.0
346 Name.starts_with("psrlv") || // Added in 4.0
347 Name.starts_with("psub.") || // Added in 4.0
348 Name.starts_with("psubs.") || // Added in 8.0
349 Name.starts_with("psubus.") || // Added in 8.0
350 Name.starts_with("pternlog.") || // Added in 7.0
351 Name.starts_with("punpckh") || // Added in 3.9
352 Name.starts_with("punpckl") || // Added in 3.9
353 Name.starts_with("pxor.") || // Added in 3.9
354 Name.starts_with("shuf.f") || // Added in 6.0
355 Name.starts_with("shuf.i") || // Added in 6.0
356 Name.starts_with("shuf.p") || // Added in 4.0
357 Name.starts_with("sqrt.p") || // Added in 7.0
358 Name.starts_with("store.b.") || // Added in 3.9
359 Name.starts_with("store.d.") || // Added in 3.9
360 Name.starts_with("store.p") || // Added in 3.9
361 Name.starts_with("store.q.") || // Added in 3.9
362 Name.starts_with("store.w.") || // Added in 3.9
363 Name == "store.ss" || // Added in 7.0
364 Name.starts_with("storeu.") || // Added in 3.9
365 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
366 Name.starts_with("ucmp.") || // Added in 5.0
367 Name.starts_with("unpckh.") || // Added in 3.9
368 Name.starts_with("unpckl.") || // Added in 3.9
369 Name.starts_with("valign.") || // Added in 4.0
370 Name == "vcvtph2ps.128" || // Added in 11.0
371 Name == "vcvtph2ps.256" || // Added in 11.0
372 Name.starts_with("vextract") || // Added in 4.0
373 Name.starts_with("vfmadd.") || // Added in 7.0
374 Name.starts_with("vfmaddsub.") || // Added in 7.0
375 Name.starts_with("vfnmadd.") || // Added in 7.0
376 Name.starts_with("vfnmsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermi2var.") || // Added in 7.0
382 Name.starts_with("vpermil.p") || // Added in 3.9
383 Name.starts_with("vpermilvar.") || // Added in 4.0
384 Name.starts_with("vpermt2var.") || // Added in 7.0
385 Name.starts_with("vpmadd52") || // Added in 7.0
386 Name.starts_with("vpshld.") || // Added in 7.0
387 Name.starts_with("vpshldv.") || // Added in 8.0
388 Name.starts_with("vpshrd.") || // Added in 7.0
389 Name.starts_with("vpshrdv.") || // Added in 8.0
390 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
391 Name.starts_with("xor.")); // Added in 3.9
392
393 if (Name.consume_front("mask3."))
394 // 'avx512.mask3.*'
395 return (Name.starts_with("vfmadd.") || // Added in 7.0
396 Name.starts_with("vfmaddsub.") || // Added in 7.0
397 Name.starts_with("vfmsub.") || // Added in 7.0
398 Name.starts_with("vfmsubadd.") || // Added in 7.0
399 Name.starts_with("vfnmsub.")); // Added in 7.0
400
401 if (Name.consume_front("maskz."))
402 // 'avx512.maskz.*'
403 return (Name.starts_with("pternlog.") || // Added in 7.0
404 Name.starts_with("vfmadd.") || // Added in 7.0
405 Name.starts_with("vfmaddsub.") || // Added in 7.0
406 Name.starts_with("vpdpbusd.") || // Added in 7.0
407 Name.starts_with("vpdpbusds.") || // Added in 7.0
408 Name.starts_with("vpdpwssd.") || // Added in 7.0
409 Name.starts_with("vpdpwssds.") || // Added in 7.0
410 Name.starts_with("vpermt2var.") || // Added in 7.0
411 Name.starts_with("vpmadd52") || // Added in 7.0
412 Name.starts_with("vpshldv.") || // Added in 8.0
413 Name.starts_with("vpshrdv.")); // Added in 8.0
414
415 // 'avx512.*'
416 return (Name == "movntdqa" || // Added in 5.0
417 Name == "pmul.dq.512" || // Added in 7.0
418 Name == "pmulu.dq.512" || // Added in 7.0
419 Name.starts_with("broadcastm") || // Added in 6.0
420 Name.starts_with("cmp.p") || // Added in 12.0
421 Name.starts_with("cvtb2mask.") || // Added in 7.0
422 Name.starts_with("cvtd2mask.") || // Added in 7.0
423 Name.starts_with("cvtmask2") || // Added in 5.0
424 Name.starts_with("cvtq2mask.") || // Added in 7.0
425 Name == "cvtusi2sd" || // Added in 7.0
426 Name.starts_with("cvtw2mask.") || // Added in 7.0
427 Name == "kand.w" || // Added in 7.0
428 Name == "kandn.w" || // Added in 7.0
429 Name == "knot.w" || // Added in 7.0
430 Name == "kor.w" || // Added in 7.0
431 Name == "kortestc.w" || // Added in 7.0
432 Name == "kortestz.w" || // Added in 7.0
433 Name.starts_with("kunpck") || // added in 6.0
434 Name == "kxnor.w" || // Added in 7.0
435 Name == "kxor.w" || // Added in 7.0
436 Name.starts_with("padds.") || // Added in 8.0
437 Name.starts_with("pbroadcast") || // Added in 3.9
438 Name.starts_with("prol") || // Added in 8.0
439 Name.starts_with("pror") || // Added in 8.0
440 Name.starts_with("psll.dq") || // Added in 3.9
441 Name.starts_with("psrl.dq") || // Added in 3.9
442 Name.starts_with("psubs.") || // Added in 8.0
443 Name.starts_with("ptestm") || // Added in 6.0
444 Name.starts_with("ptestnm") || // Added in 6.0
445 Name.starts_with("storent.") || // Added in 3.9
446 Name.starts_with("vbroadcast.s") || // Added in 7.0
447 Name.starts_with("vpshld.") || // Added in 8.0
448 Name.starts_with("vpshrd.")); // Added in 8.0
449 }
450
451 if (Name.consume_front("fma."))
452 return (Name.starts_with("vfmadd.") || // Added in 7.0
453 Name.starts_with("vfmsub.") || // Added in 7.0
454 Name.starts_with("vfmsubadd.") || // Added in 7.0
455 Name.starts_with("vfnmadd.") || // Added in 7.0
456 Name.starts_with("vfnmsub.")); // Added in 7.0
457
458 if (Name.consume_front("fma4."))
459 return Name.starts_with("vfmadd.s"); // Added in 7.0
460
461 if (Name.consume_front("sse."))
462 return (Name == "add.ss" || // Added in 4.0
463 Name == "cvtsi2ss" || // Added in 7.0
464 Name == "cvtsi642ss" || // Added in 7.0
465 Name == "div.ss" || // Added in 4.0
466 Name == "mul.ss" || // Added in 4.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.ss" || // Added in 7.0
469 Name.starts_with("storeu.") || // Added in 3.9
470 Name == "sub.ss"); // Added in 4.0
471
472 if (Name.consume_front("sse2."))
473 return (Name == "add.sd" || // Added in 4.0
474 Name == "cvtdq2pd" || // Added in 3.9
475 Name == "cvtdq2ps" || // Added in 7.0
476 Name == "cvtps2pd" || // Added in 3.9
477 Name == "cvtsi2sd" || // Added in 7.0
478 Name == "cvtsi642sd" || // Added in 7.0
479 Name == "cvtss2sd" || // Added in 7.0
480 Name == "div.sd" || // Added in 4.0
481 Name == "mul.sd" || // Added in 4.0
482 Name.starts_with("padds.") || // Added in 8.0
483 Name.starts_with("paddus.") || // Added in 8.0
484 Name.starts_with("pcmpeq.") || // Added in 3.1
485 Name.starts_with("pcmpgt.") || // Added in 3.1
486 Name == "pmaxs.w" || // Added in 3.9
487 Name == "pmaxu.b" || // Added in 3.9
488 Name == "pmins.w" || // Added in 3.9
489 Name == "pminu.b" || // Added in 3.9
490 Name == "pmulu.dq" || // Added in 7.0
491 Name.starts_with("pshuf") || // Added in 3.9
492 Name.starts_with("psll.dq") || // Added in 3.7
493 Name.starts_with("psrl.dq") || // Added in 3.7
494 Name.starts_with("psubs.") || // Added in 8.0
495 Name.starts_with("psubus.") || // Added in 8.0
496 Name.starts_with("sqrt.p") || // Added in 7.0
497 Name == "sqrt.sd" || // Added in 7.0
498 Name == "storel.dq" || // Added in 3.9
499 Name.starts_with("storeu.") || // Added in 3.9
500 Name == "sub.sd"); // Added in 4.0
501
502 if (Name.consume_front("sse41."))
503 return (Name.starts_with("blendp") || // Added in 3.7
504 Name == "movntdqa" || // Added in 5.0
505 Name == "pblendw" || // Added in 3.7
506 Name == "pmaxsb" || // Added in 3.9
507 Name == "pmaxsd" || // Added in 3.9
508 Name == "pmaxud" || // Added in 3.9
509 Name == "pmaxuw" || // Added in 3.9
510 Name == "pminsb" || // Added in 3.9
511 Name == "pminsd" || // Added in 3.9
512 Name == "pminud" || // Added in 3.9
513 Name == "pminuw" || // Added in 3.9
514 Name.starts_with("pmovsx") || // Added in 3.8
515 Name.starts_with("pmovzx") || // Added in 3.9
516 Name == "pmuldq"); // Added in 7.0
517
518 if (Name.consume_front("sse42."))
519 return Name == "crc32.64.8"; // Added in 3.4
520
521 if (Name.consume_front("sse4a."))
522 return Name.starts_with("movnt."); // Added in 3.9
523
524 if (Name.consume_front("ssse3."))
525 return (Name == "pabs.b.128" || // Added in 6.0
526 Name == "pabs.d.128" || // Added in 6.0
527 Name == "pabs.w.128"); // Added in 6.0
528
529 if (Name.consume_front("xop."))
530 return (Name == "vpcmov" || // Added in 3.8
531 Name == "vpcmov.256" || // Added in 5.0
532 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
533 Name.starts_with("vprot")); // Added in 8.0
534
535 return (Name == "addcarry.u32" || // Added in 8.0
536 Name == "addcarry.u64" || // Added in 8.0
537 Name == "addcarryx.u32" || // Added in 8.0
538 Name == "addcarryx.u64" || // Added in 8.0
539 Name == "subborrow.u32" || // Added in 8.0
540 Name == "subborrow.u64" || // Added in 8.0
541 Name.starts_with("vcvtph2ps.")); // Added in 11.0
542}
543
545 Function *&NewFn) {
546 // Only handle intrinsics that start with "x86.".
547 if (!Name.consume_front("x86."))
548 return false;
549
550 if (shouldUpgradeX86Intrinsic(F, Name)) {
551 NewFn = nullptr;
552 return true;
553 }
554
555 if (Name == "rdtscp") { // Added in 8.0
556 // If this intrinsic has 0 operands, it's the new version.
557 if (F->getFunctionType()->getNumParams() == 0)
558 return false;
559
560 rename(F);
561 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
562 Intrinsic::x86_rdtscp);
563 return true;
564 }
565
567
568 // SSE4.1 ptest functions may have an old signature.
569 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
571 .Case("c", Intrinsic::x86_sse41_ptestc)
572 .Case("z", Intrinsic::x86_sse41_ptestz)
573 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
576 return upgradePTESTIntrinsic(F, ID, NewFn);
577
578 return false;
579 }
580
581 // Several blend and other instructions with masks used the wrong number of
582 // bits.
583
584 // Added in 3.6
586 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
594 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
595
596 if (Name.consume_front("avx512.")) {
597 if (Name.consume_front("mask.cmp.")) {
598 // Added in 7.0
600 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
608 return upgradeX86MaskedFPCompare(F, ID, NewFn);
609 } else if (Name.starts_with("vpdpbusd.") ||
610 Name.starts_with("vpdpbusds.")) {
611 // Added in 21.1
613 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
621 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
622 } else if (Name.starts_with("vpdpwssd.") ||
623 Name.starts_with("vpdpwssds.")) {
624 // Added in 21.1
626 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
634 return upgradeX86MultiplyAddWords(F, ID, NewFn);
635 }
636 return false; // No other 'x86.avx512.*'.
637 }
638
639 if (Name.consume_front("avx2.")) {
640 if (Name.consume_front("vpdpb")) {
641 // Added in 21.1
643 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
657 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
658 } else if (Name.consume_front("vpdpw")) {
659 // Added in 21.1
661 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
675 return upgradeX86MultiplyAddWords(F, ID, NewFn);
676 }
677 return false; // No other 'x86.avx2.*'
678 }
679
680 if (Name.consume_front("avx10.")) {
681 if (Name.consume_front("vpdpb")) {
682 // Added in 21.1
684 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
692 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
693 } else if (Name.consume_front("vpdpw")) {
695 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
703 return upgradeX86MultiplyAddWords(F, ID, NewFn);
704 }
705 return false; // No other 'x86.avx10.*'
706 }
707
708 if (Name.consume_front("avx512bf16.")) {
709 // Added in 9.0
711 .Case("cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .Case("cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .Case("cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .Case("mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .Case("cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .Case("cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
725 return upgradeX86BF16Intrinsic(F, ID, NewFn);
726
727 // Added in 9.0
729 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
734 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
735 return false; // No other 'x86.avx512bf16.*'.
736 }
737
738 if (Name.consume_front("xop.")) {
740 if (Name.starts_with("vpermil2")) { // Added in 3.9
741 // Upgrade any XOP PERMIL2 index operand still using a float/double
742 // vector.
743 auto Idx = F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
753 else
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
755 }
756 } else if (F->arg_size() == 2)
757 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
759 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
762
764 rename(F);
765 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
766 return true;
767 }
768 return false; // No other 'x86.xop.*'
769 }
770
771 if (Name == "seh.recoverfp") {
772 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
773 Intrinsic::eh_recoverfp);
774 return true;
775 }
776
777 return false;
778}
779
780// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
781// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
783 StringRef Name,
784 Function *&NewFn) {
785 if (Name.starts_with("rbit")) {
786 // '(arm|aarch64).rbit'.
788 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
789 return true;
790 }
791
792 if (Name == "thread.pointer") {
793 // '(arm|aarch64).thread.pointer'.
795 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
796 return true;
797 }
798
799 bool Neon = Name.consume_front("neon.");
800 if (Neon) {
801 // '(arm|aarch64).neon.*'.
802 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
803 // v16i8 respectively.
804 if (Name.consume_front("bfdot.")) {
805 // (arm|aarch64).neon.bfdot.*'.
808 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
809 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
810 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
813 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
816 LLVMContext &Ctx = F->getParent()->getContext();
817 std::array<Type *, 2> Tys{
818 {F->getReturnType(),
819 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
820 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
821 return true;
822 }
823 return false; // No other '(arm|aarch64).neon.bfdot.*'.
824 }
825
826 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
827 // anymore and accept v8bf16 instead of v16i8.
828 if (Name.consume_front("bfm")) {
829 // (arm|aarch64).neon.bfm*'.
830 if (Name.consume_back(".v4f32.v16i8")) {
831 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
834 .Case("mla",
835 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
836 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
837 .Case("lalb",
838 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
839 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
840 .Case("lalt",
841 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
842 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
845 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
846 return true;
847 }
848 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
849 }
850 return false; // No other '(arm|aarch64).neon.bfm*.
851 }
852 // Continue on to Aarch64 Neon or Arm Neon.
853 }
854 // Continue on to Arm or Aarch64.
855
856 if (IsArm) {
857 // 'arm.*'.
858 if (Neon) {
859 // 'arm.neon.*'.
861 .StartsWith("vclz.", Intrinsic::ctlz)
862 .StartsWith("vcnt.", Intrinsic::ctpop)
863 .StartsWith("vqadds.", Intrinsic::sadd_sat)
864 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
865 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
866 .StartsWith("vqsubu.", Intrinsic::usub_sat)
867 .StartsWith("vrinta.", Intrinsic::round)
868 .StartsWith("vrintn.", Intrinsic::roundeven)
869 .StartsWith("vrintm.", Intrinsic::floor)
870 .StartsWith("vrintp.", Intrinsic::ceil)
871 .StartsWith("vrintx.", Intrinsic::rint)
872 .StartsWith("vrintz.", Intrinsic::trunc)
875 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
876 F->arg_begin()->getType());
877 return true;
878 }
879
880 if (Name.consume_front("vst")) {
881 // 'arm.neon.vst*'.
882 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
884 if (vstRegex.match(Name, &Groups)) {
885 static const Intrinsic::ID StoreInts[] = {
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
888
889 static const Intrinsic::ID StoreLaneInts[] = {
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
892
893 auto fArgs = F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
895 if (Groups[1].size() == 1)
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
898 else
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
901 return true;
902 }
903 return false; // No other 'arm.neon.vst*'.
904 }
905
906 return false; // No other 'arm.neon.*'.
907 }
908
909 if (Name.consume_front("mve.")) {
910 // 'arm.mve.*'.
911 if (Name == "vctp64") {
912 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
913 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
914 // the function and deal with it below in UpgradeIntrinsicCall.
915 rename(F);
916 return true;
917 }
918 return false; // Not 'arm.mve.vctp64'.
919 }
920
921 if (Name.starts_with("vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
924 return true;
925 }
926
927 // These too are changed to accept a v2i1 instead of the old v4i1.
928 if (Name.consume_back(".v4i1")) {
929 // 'arm.mve.*.v4i1'.
930 if (Name.consume_back(".predicated.v2i64.v4i32"))
931 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
932 return Name == "mull.int" || Name == "vqdmull";
933
934 if (Name.consume_back(".v2i64")) {
935 // 'arm.mve.*.v2i64.v4i1'
936 bool IsGather = Name.consume_front("vldr.gather.");
937 if (IsGather || Name.consume_front("vstr.scatter.")) {
938 if (Name.consume_front("base.")) {
939 // Optional 'wb.' prefix.
940 Name.consume_front("wb.");
941 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
942 // predicated.v2i64.v2i64.v4i1'.
943 return Name == "predicated.v2i64";
944 }
945
946 if (Name.consume_front("offset.predicated."))
947 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
948 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
949
950 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
951 return false;
952 }
953
954 return false; // No other 'arm.mve.*.v2i64.v4i1'.
955 }
956 return false; // No other 'arm.mve.*.v4i1'.
957 }
958 return false; // No other 'arm.mve.*'.
959 }
960
961 if (Name.consume_front("cde.vcx")) {
962 // 'arm.cde.vcx*'.
963 if (Name.consume_back(".predicated.v2i64.v4i1"))
964 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
965 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
966 Name == "3q" || Name == "3qa";
967
968 return false; // No other 'arm.cde.vcx*'.
969 }
970 } else {
971 // 'aarch64.*'.
972 if (Neon) {
973 // 'aarch64.neon.*'.
975 .StartsWith("frintn", Intrinsic::roundeven)
976 .StartsWith("rbit", Intrinsic::bitreverse)
979 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
980 F->arg_begin()->getType());
981 return true;
982 }
983
984 if (Name.starts_with("addp")) {
985 // 'aarch64.neon.addp*'.
986 if (F->arg_size() != 2)
987 return false; // Invalid IR.
988 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
992 return true;
993 }
994 }
995
996 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
997 if (Name.starts_with("bfcvt")) {
998 NewFn = nullptr;
999 return true;
1000 }
1001
1002 return false; // No other 'aarch64.neon.*'.
1003 }
1004 if (Name.consume_front("sve.")) {
1005 // 'aarch64.sve.*'.
1006 if (Name.consume_front("bf")) {
1007 if (Name == "mmla") {
1008 Type *Tys[] = {F->getReturnType(),
1009 std::next(F->arg_begin())->getType()};
1011 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1012 return true;
1013 }
1014 if (Name.consume_back(".lane")) {
1015 // 'aarch64.sve.bf*.lane'.
1018 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1019 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1020 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1023 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1024 return true;
1025 }
1026 return false; // No other 'aarch64.sve.bf*.lane'.
1027 }
1028 return false; // No other 'aarch64.sve.bf*'.
1029 }
1030
1031 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1032 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1033 NewFn = nullptr;
1034 return true;
1035 }
1036
1037 if (Name.consume_front("addqv")) {
1038 // 'aarch64.sve.addqv'.
1039 if (!F->getReturnType()->isFPOrFPVectorTy())
1040 return false;
1041
1042 auto Args = F->getFunctionType()->params();
1043 Type *Tys[] = {F->getReturnType(), Args[1]};
1045 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1046 return true;
1047 }
1048
1049 if (Name.consume_front("ld")) {
1050 // 'aarch64.sve.ld*'.
1051 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1052 if (LdRegex.match(Name)) {
1053 Type *ScalarTy =
1054 cast<VectorType>(F->getReturnType())->getElementType();
1055 ElementCount EC =
1056 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1057 assert(F->arg_size() == 2 &&
1058 "Expected 2 arguments for ld* intrinsic.");
1059 Type *PtrTy = F->getArg(1)->getType();
1060 Type *Ty = VectorType::get(ScalarTy, EC);
1061 static const Intrinsic::ID LoadIDs[] = {
1062 Intrinsic::aarch64_sve_ld2_sret,
1063 Intrinsic::aarch64_sve_ld3_sret,
1064 Intrinsic::aarch64_sve_ld4_sret,
1065 };
1067 F->getParent(), LoadIDs[Name[0] - '2'], {Ty, PtrTy});
1068 return true;
1069 }
1070 return false; // No other 'aarch64.sve.ld*'.
1071 }
1072
1073 if (Name.consume_front("tuple.")) {
1074 // 'aarch64.sve.tuple.*'.
1075 if (Name.starts_with("get")) {
1076 // 'aarch64.sve.tuple.get*'.
1077 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1079 F->getParent(), Intrinsic::vector_extract, Tys);
1080 return true;
1081 }
1082
1083 if (Name.starts_with("set")) {
1084 // 'aarch64.sve.tuple.set*'.
1085 auto Args = F->getFunctionType()->params();
1086 Type *Tys[] = {Args[0], Args[2], Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1089 return true;
1090 }
1091
1092 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1093 if (CreateTupleRegex.match(Name)) {
1094 // 'aarch64.sve.tuple.create*'.
1095 auto Args = F->getFunctionType()->params();
1096 Type *Tys[] = {F->getReturnType(), Args[1]};
1098 F->getParent(), Intrinsic::vector_insert, Tys);
1099 return true;
1100 }
1101 return false; // No other 'aarch64.sve.tuple.*'.
1102 }
1103
1104 if (Name.starts_with("rev.nxv")) {
1105 // 'aarch64.sve.rev.<Ty>'
1107 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1108 return true;
1109 }
1110
1111 return false; // No other 'aarch64.sve.*'.
1112 }
1113 }
1114 return false; // No other 'arm.*', 'aarch64.*'.
1115}
1116
1118 StringRef Name) {
1119 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1122 .Case("im2col.3d",
1123 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1124 .Case("im2col.4d",
1125 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1126 .Case("im2col.5d",
1127 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1128 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1129 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1130 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1131 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1132 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1134
1136 return ID;
1137
1138 // These intrinsics may need upgrade for two reasons:
1139 // (1) When the address-space of the first argument is shared[AS=3]
1140 // (and we upgrade it to use shared_cluster address-space[AS=7])
1141 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1143 return ID;
1144
1145 // (2) When there are only two boolean flag arguments at the end:
1146 //
1147 // The last three parameters of the older version of these
1148 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1149 //
1150 // The newer version reads as:
1151 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1152 //
1153 // So, when the type of the [N-3]rd argument is "not i1", then
1154 // it is the older version and we need to upgrade.
1155 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1156 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1157 if (!ArgType->isIntegerTy(1))
1158 return ID;
1159 }
1160
1162}
1163
1165 StringRef Name) {
1166 if (Name.consume_front("mapa.shared.cluster"))
1167 if (F->getReturnType()->getPointerAddressSpace() ==
1169 return Intrinsic::nvvm_mapa_shared_cluster;
1170
1171 if (Name.consume_front("cp.async.bulk.")) {
1174 .Case("global.to.shared.cluster",
1175 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1176 .Case("shared.cta.to.cluster",
1177 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1179
1181 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1183 return ID;
1184 }
1185
1187}
1188
1190 if (Name.consume_front("fma.rn."))
1191 return StringSwitch<Intrinsic::ID>(Name)
1192 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1193 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1194 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1195 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1197
1198 if (Name.consume_front("fmax."))
1199 return StringSwitch<Intrinsic::ID>(Name)
1200 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1201 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1202 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1203 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1204 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1205 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1206 .Case("ftz.nan.xorsign.abs.bf16",
1207 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1208 .Case("ftz.nan.xorsign.abs.bf16x2",
1209 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1210 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1211 .Case("ftz.xorsign.abs.bf16x2",
1212 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1213 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1214 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1215 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1216 .Case("nan.xorsign.abs.bf16x2",
1217 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1218 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1219 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1221
1222 if (Name.consume_front("fmin."))
1223 return StringSwitch<Intrinsic::ID>(Name)
1224 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1225 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1226 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1227 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1228 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1229 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1230 .Case("ftz.nan.xorsign.abs.bf16",
1231 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1232 .Case("ftz.nan.xorsign.abs.bf16x2",
1233 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1234 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1235 .Case("ftz.xorsign.abs.bf16x2",
1236 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1237 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1238 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1239 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1240 .Case("nan.xorsign.abs.bf16x2",
1241 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1242 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1243 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1245
1246 if (Name.consume_front("neg."))
1247 return StringSwitch<Intrinsic::ID>(Name)
1248 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1249 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1251
1253}
1254
1256 return Name.consume_front("local") || Name.consume_front("shared") ||
1257 Name.consume_front("global") || Name.consume_front("constant") ||
1258 Name.consume_front("param");
1259}
1260
1262 const FunctionType *FuncTy) {
1263 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1264 if (Name.starts_with("to.fp16")) {
1265 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1266 HalfTy) &&
1267 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1268 FuncTy->getReturnType());
1269 }
1270
1271 if (Name.starts_with("from.fp16")) {
1272 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1273 HalfTy) &&
1274 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1275 FuncTy->getReturnType());
1276 }
1277
1278 return false;
1279}
1280
1282 bool CanUpgradeDebugIntrinsicsToRecords) {
1283 assert(F && "Illegal to upgrade a non-existent Function.");
1284
1285 StringRef Name = F->getName();
1286
1287 // Quickly eliminate it, if it's not a candidate.
1288 if (!Name.consume_front("llvm.") || Name.empty())
1289 return false;
1290
1291 switch (Name[0]) {
1292 default: break;
1293 case 'a': {
1294 bool IsArm = Name.consume_front("arm.");
1295 if (IsArm || Name.consume_front("aarch64.")) {
1296 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1297 return true;
1298 break;
1299 }
1300
1301 if (Name.consume_front("amdgcn.")) {
1302 if (Name == "alignbit") {
1303 // Target specific intrinsic became redundant
1305 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1306 return true;
1307 }
1308
1309 if (Name.consume_front("atomic.")) {
1310 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1311 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1312 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1313 // and usub_sat so there's no new declaration.
1314 NewFn = nullptr;
1315 return true;
1316 }
1317 break; // No other 'amdgcn.atomic.*'
1318 }
1319
1320 switch (F->getIntrinsicID()) {
1321 default:
1322 break;
1323 // Legacy wmma iu intrinsics without the optional clamp operand.
1324 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1325 if (F->arg_size() == 7) {
1326 NewFn = nullptr;
1327 return true;
1328 }
1329 break;
1330 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1331 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1334 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1336 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1337 if (F->arg_size() == 8) {
1338 NewFn = nullptr;
1339 return true;
1340 }
1341 break;
1342 }
1343
1344 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1345 Name.consume_front("flat.atomic.")) {
1346 if (Name.starts_with("fadd") ||
1347 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1348 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1349 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1350 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1351 // declaration.
1352 NewFn = nullptr;
1353 return true;
1354 }
1355 }
1356
1357 if (Name.starts_with("ldexp.")) {
1358 // Target specific intrinsic became redundant
1360 F->getParent(), Intrinsic::ldexp,
1361 {F->getReturnType(), F->getArg(1)->getType()});
1362 return true;
1363 }
1364 break; // No other 'amdgcn.*'
1365 }
1366
1367 break;
1368 }
1369 case 'c': {
1370 if (F->arg_size() == 1) {
1371 if (Name.consume_front("convert.")) {
1372 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1373 NewFn = nullptr;
1374 return true;
1375 }
1376 }
1377
1379 .StartsWith("ctlz.", Intrinsic::ctlz)
1380 .StartsWith("cttz.", Intrinsic::cttz)
1383 rename(F);
1384 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1385 F->arg_begin()->getType());
1386 return true;
1387 }
1388 }
1389
1390 if (F->arg_size() == 2 && Name == "coro.end") {
1391 rename(F);
1392 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1393 Intrinsic::coro_end);
1394 return true;
1395 }
1396
1397 break;
1398 }
1399 case 'd':
1400 if (Name.consume_front("dbg.")) {
1401 // Mark debug intrinsics for upgrade to new debug format.
1402 if (CanUpgradeDebugIntrinsicsToRecords) {
1403 if (Name == "addr" || Name == "value" || Name == "assign" ||
1404 Name == "declare" || Name == "label") {
1405 // There's no function to replace these with.
1406 NewFn = nullptr;
1407 // But we do want these to get upgraded.
1408 return true;
1409 }
1410 }
1411 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1412 // converted to DbgVariableRecords later.
1413 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1414 rename(F);
1415 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1416 Intrinsic::dbg_value);
1417 return true;
1418 }
1419 break; // No other 'dbg.*'.
1420 }
1421 break;
1422 case 'e':
1423 if (Name.consume_front("experimental.vector.")) {
1426 // Skip over extract.last.active, otherwise it will be 'upgraded'
1427 // to a regular vector extract which is a different operation.
1428 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1429 .StartsWith("extract.", Intrinsic::vector_extract)
1430 .StartsWith("insert.", Intrinsic::vector_insert)
1431 .StartsWith("reverse.", Intrinsic::vector_reverse)
1432 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1433 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1434 .StartsWith("partial.reduce.add",
1435 Intrinsic::vector_partial_reduce_add)
1438 const auto *FT = F->getFunctionType();
1440 if (ID == Intrinsic::vector_extract ||
1441 ID == Intrinsic::vector_interleave2)
1442 // Extracting overloads the return type.
1443 Tys.push_back(FT->getReturnType());
1444 if (ID != Intrinsic::vector_interleave2)
1445 Tys.push_back(FT->getParamType(0));
1446 if (ID == Intrinsic::vector_insert ||
1447 ID == Intrinsic::vector_partial_reduce_add)
1448 // Inserting overloads the inserted type.
1449 Tys.push_back(FT->getParamType(1));
1450 rename(F);
1451 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1452 return true;
1453 }
1454
1455 if (Name.consume_front("reduce.")) {
1457 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1458 if (R.match(Name, &Groups))
1460 .Case("add", Intrinsic::vector_reduce_add)
1461 .Case("mul", Intrinsic::vector_reduce_mul)
1462 .Case("and", Intrinsic::vector_reduce_and)
1463 .Case("or", Intrinsic::vector_reduce_or)
1464 .Case("xor", Intrinsic::vector_reduce_xor)
1465 .Case("smax", Intrinsic::vector_reduce_smax)
1466 .Case("smin", Intrinsic::vector_reduce_smin)
1467 .Case("umax", Intrinsic::vector_reduce_umax)
1468 .Case("umin", Intrinsic::vector_reduce_umin)
1469 .Case("fmax", Intrinsic::vector_reduce_fmax)
1470 .Case("fmin", Intrinsic::vector_reduce_fmin)
1472
1473 bool V2 = false;
1475 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1476 Groups.clear();
1477 V2 = true;
1478 if (R2.match(Name, &Groups))
1480 .Case("fadd", Intrinsic::vector_reduce_fadd)
1481 .Case("fmul", Intrinsic::vector_reduce_fmul)
1483 }
1485 rename(F);
1486 auto Args = F->getFunctionType()->params();
1487 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1488 {Args[V2 ? 1 : 0]});
1489 return true;
1490 }
1491 break; // No other 'expermental.vector.reduce.*'.
1492 }
1493
1494 if (Name.consume_front("splice"))
1495 return true;
1496 break; // No other 'experimental.vector.*'.
1497 }
1498 if (Name.consume_front("experimental.stepvector.")) {
1499 Intrinsic::ID ID = Intrinsic::stepvector;
1500 rename(F);
1502 F->getParent(), ID, F->getFunctionType()->getReturnType());
1503 return true;
1504 }
1505 break; // No other 'e*'.
1506 case 'f':
1507 if (Name.starts_with("flt.rounds")) {
1508 rename(F);
1509 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1510 Intrinsic::get_rounding);
1511 return true;
1512 }
1513 break;
1514 case 'i':
1515 if (Name.starts_with("invariant.group.barrier")) {
1516 // Rename invariant.group.barrier to launder.invariant.group
1517 auto Args = F->getFunctionType()->params();
1518 Type* ObjectPtr[1] = {Args[0]};
1519 rename(F);
1521 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1522 return true;
1523 }
1524 break;
1525 case 'l':
1526 if ((Name.starts_with("lifetime.start") ||
1527 Name.starts_with("lifetime.end")) &&
1528 F->arg_size() == 2) {
1529 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1530 ? Intrinsic::lifetime_start
1531 : Intrinsic::lifetime_end;
1532 rename(F);
1533 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1534 F->getArg(0)->getType());
1535 return true;
1536 }
1537 break;
1538 case 'm': {
1539 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1540 // alignment parameter to embedding the alignment as an attribute of
1541 // the pointer args.
1542 if (unsigned ID = StringSwitch<unsigned>(Name)
1543 .StartsWith("memcpy.", Intrinsic::memcpy)
1544 .StartsWith("memmove.", Intrinsic::memmove)
1545 .Default(0)) {
1546 if (F->arg_size() == 5) {
1547 rename(F);
1548 // Get the types of dest, src, and len
1549 ArrayRef<Type *> ParamTypes =
1550 F->getFunctionType()->params().slice(0, 3);
1551 NewFn =
1552 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1553 return true;
1554 }
1555 }
1556 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1557 rename(F);
1558 // Get the types of dest, and len
1559 const auto *FT = F->getFunctionType();
1560 Type *ParamTypes[2] = {
1561 FT->getParamType(0), // Dest
1562 FT->getParamType(2) // len
1563 };
1564 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1565 Intrinsic::memset, ParamTypes);
1566 return true;
1567 }
1568
1569 unsigned MaskedID =
1571 .StartsWith("masked.load", Intrinsic::masked_load)
1572 .StartsWith("masked.gather", Intrinsic::masked_gather)
1573 .StartsWith("masked.store", Intrinsic::masked_store)
1574 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1575 .Default(0);
1576 if (MaskedID && F->arg_size() == 4) {
1577 rename(F);
1578 if (MaskedID == Intrinsic::masked_load ||
1579 MaskedID == Intrinsic::masked_gather) {
1581 F->getParent(), MaskedID,
1582 {F->getReturnType(), F->getArg(0)->getType()});
1583 return true;
1584 }
1586 F->getParent(), MaskedID,
1587 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1588 return true;
1589 }
1590 break;
1591 }
1592 case 'n': {
1593 if (Name.consume_front("nvvm.")) {
1594 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1595 if (F->arg_size() == 1) {
1596 Intrinsic::ID IID =
1598 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1599 .Case("clz.i", Intrinsic::ctlz)
1600 .Case("popc.i", Intrinsic::ctpop)
1602 if (IID != Intrinsic::not_intrinsic) {
1603 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1604 {F->getReturnType()});
1605 return true;
1606 }
1607 } else if (F->arg_size() == 2) {
1608 Intrinsic::ID IID =
1610 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1611 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1612 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1613 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1615 if (IID != Intrinsic::not_intrinsic) {
1616 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1617 {F->getReturnType()});
1618 return true;
1619 }
1620 }
1621
1622 // Check for nvvm intrinsics that need a return type adjustment.
1623 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1625 if (IID != Intrinsic::not_intrinsic) {
1626 NewFn = nullptr;
1627 return true;
1628 }
1629 }
1630
1631 // Upgrade Distributed Shared Memory Intrinsics
1633 if (IID != Intrinsic::not_intrinsic) {
1634 rename(F);
1635 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1636 return true;
1637 }
1638
1639 // Upgrade TMA copy G2S Intrinsics
1641 if (IID != Intrinsic::not_intrinsic) {
1642 rename(F);
1643 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1644 return true;
1645 }
1646
1647 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1648 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1649 //
1650 // TODO: We could add lohi.i2d.
1651 bool Expand = false;
1652 if (Name.consume_front("abs."))
1653 // nvvm.abs.{i,ii}
1654 Expand =
1655 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1656 else if (Name.consume_front("fabs."))
1657 // nvvm.fabs.{f,ftz.f,d}
1658 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1659 else if (Name.consume_front("ex2.approx."))
1660 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1661 Expand =
1662 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1663 else if (Name.consume_front("atomic.load."))
1664 // nvvm.atomic.load.add.{f32,f64}.p
1665 // nvvm.atomic.load.{inc,dec}.32.p
1666 Expand = StringSwitch<bool>(Name)
1667 .StartsWith("add.f32.p", true)
1668 .StartsWith("add.f64.p", true)
1669 .StartsWith("inc.32.p", true)
1670 .StartsWith("dec.32.p", true)
1671 .Default(false);
1672 else if (Name.consume_front("bitcast."))
1673 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1674 Expand =
1675 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1676 else if (Name.consume_front("rotate."))
1677 // nvvm.rotate.{b32,b64,right.b64}
1678 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1679 else if (Name.consume_front("ptr.gen.to."))
1680 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1681 Expand = consumeNVVMPtrAddrSpace(Name);
1682 else if (Name.consume_front("ptr."))
1683 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1684 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1685 else if (Name.consume_front("ldg.global."))
1686 // nvvm.ldg.global.{i,p,f}
1687 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1688 Name.starts_with("p."));
1689 else
1690 Expand = StringSwitch<bool>(Name)
1691 .Case("barrier0", true)
1692 .Case("barrier.n", true)
1693 .Case("barrier.sync.cnt", true)
1694 .Case("barrier.sync", true)
1695 .Case("barrier", true)
1696 .Case("bar.sync", true)
1697 .Case("barrier0.popc", true)
1698 .Case("barrier0.and", true)
1699 .Case("barrier0.or", true)
1700 .Case("clz.ll", true)
1701 .Case("popc.ll", true)
1702 .Case("h2f", true)
1703 .Case("swap.lo.hi.b64", true)
1704 .Case("tanh.approx.f32", true)
1705 .Default(false);
1706
1707 if (Expand) {
1708 NewFn = nullptr;
1709 return true;
1710 }
1711 break; // No other 'nvvm.*'.
1712 }
1713 break;
1714 }
1715 case 'o':
1716 if (Name.starts_with("objectsize.")) {
1717 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1718 if (F->arg_size() == 2 || F->arg_size() == 3) {
1719 rename(F);
1720 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1721 Intrinsic::objectsize, Tys);
1722 return true;
1723 }
1724 }
1725 break;
1726
1727 case 'p':
1728 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1729 rename(F);
1731 F->getParent(), Intrinsic::ptr_annotation,
1732 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1733 return true;
1734 }
1735 break;
1736
1737 case 'r': {
1738 if (Name.consume_front("riscv.")) {
1741 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1742 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1743 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1744 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1747 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1748 rename(F);
1749 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1750 return true;
1751 }
1752 break; // No other applicable upgrades.
1753 }
1754
1756 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1757 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1760 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1761 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1762 rename(F);
1763 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1764 return true;
1765 }
1766 break; // No other applicable upgrades.
1767 }
1768
1770 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1771 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1772 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1773 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1774 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1775 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1778 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1779 rename(F);
1780 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1781 return true;
1782 }
1783 break; // No other applicable upgrades.
1784 }
1785
1786 // Replace llvm.riscv.clmul with llvm.clmul.
1787 if (Name == "clmul.i32" || Name == "clmul.i64") {
1789 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1790 return true;
1791 }
1792
1793 break; // No other 'riscv.*' intrinsics
1794 }
1795 } break;
1796
1797 case 's':
1798 if (Name == "stackprotectorcheck") {
1799 NewFn = nullptr;
1800 return true;
1801 }
1802 break;
1803
1804 case 't':
1805 if (Name == "thread.pointer") {
1807 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1808 return true;
1809 }
1810 break;
1811
1812 case 'v': {
1813 if (Name == "var.annotation" && F->arg_size() == 4) {
1814 rename(F);
1816 F->getParent(), Intrinsic::var_annotation,
1817 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1818 return true;
1819 }
1820 if (Name.consume_front("vector.splice")) {
1821 if (Name.starts_with(".left") || Name.starts_with(".right"))
1822 break;
1823 return true;
1824 }
1825 break;
1826 }
1827
1828 case 'w':
1829 if (Name.consume_front("wasm.")) {
1832 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1833 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1834 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1837 rename(F);
1838 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1839 F->getReturnType());
1840 return true;
1841 }
1842
1843 if (Name.consume_front("dot.i8x16.i7x16.")) {
1845 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1846 .Case("add.signed",
1847 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1850 rename(F);
1851 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1852 return true;
1853 }
1854 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1855 }
1856 break; // No other 'wasm.*'.
1857 }
1858 break;
1859
1860 case 'x':
1861 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1862 return true;
1863 }
1864
1865 auto *ST = dyn_cast<StructType>(F->getReturnType());
1866 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1867 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1868 // Replace return type with literal non-packed struct. Only do this for
1869 // intrinsics declared to return a struct, not for intrinsics with
1870 // overloaded return type, in which case the exact struct type will be
1871 // mangled into the name.
1872 if (Intrinsic::hasStructReturnType(F->getIntrinsicID())) {
1873 FunctionType *FT = F->getFunctionType();
1874 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1875 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1876 std::string Name = F->getName().str();
1877 rename(F);
1878 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1879 Name, F->getParent());
1880
1881 // The new function may also need remangling.
1882 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1883 NewFn = *Result;
1884 return true;
1885 }
1886 }
1887
1888 // Remangle our intrinsic since we upgrade the mangling
1890 if (Result != std::nullopt) {
1891 NewFn = *Result;
1892 return true;
1893 }
1894
1895 // This may not belong here. This function is effectively being overloaded
1896 // to both detect an intrinsic which needs upgrading, and to provide the
1897 // upgraded form of the intrinsic. We should perhaps have two separate
1898 // functions for this.
1899 return false;
1900}
1901
1903 bool CanUpgradeDebugIntrinsicsToRecords) {
1904 NewFn = nullptr;
1905 bool Upgraded =
1906 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1907
1908 // Upgrade intrinsic attributes. This does not change the function.
1909 if (NewFn)
1910 F = NewFn;
1911 if (Intrinsic::ID id = F->getIntrinsicID()) {
1912 // Only do this if the intrinsic signature is valid.
1913 SmallVector<Type *> OverloadTys;
1914 if (Intrinsic::isSignatureValid(id, F->getFunctionType(), OverloadTys))
1915 F->setAttributes(
1916 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1917 }
1918 return Upgraded;
1919}
1920
1922 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1923 GV->getName() == "llvm.global_dtors")) ||
1924 !GV->hasInitializer())
1925 return nullptr;
1927 if (!ATy)
1928 return nullptr;
1930 if (!STy || STy->getNumElements() != 2)
1931 return nullptr;
1932
1933 LLVMContext &C = GV->getContext();
1934 IRBuilder<> IRB(C);
1935 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1936 IRB.getPtrTy());
1937 Constant *Init = GV->getInitializer();
1938 unsigned N = Init->getNumOperands();
1939 std::vector<Constant *> NewCtors(N);
1940 for (unsigned i = 0; i != N; ++i) {
1941 auto Ctor = cast<Constant>(Init->getOperand(i));
1942 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1943 Ctor->getAggregateElement(1),
1945 }
1946 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1947
1948 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1949 NewInit, GV->getName());
1950}
1951
1952// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1953// to byte shuffles.
1955 unsigned Shift) {
1956 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1957 unsigned NumElts = ResultTy->getNumElements() * 8;
1958
1959 // Bitcast from a 64-bit element type to a byte element type.
1960 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1961 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1962
1963 // We'll be shuffling in zeroes.
1964 Value *Res = Constant::getNullValue(VecTy);
1965
1966 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1967 // we'll just return the zero vector.
1968 if (Shift < 16) {
1969 int Idxs[64];
1970 // 256/512-bit version is split into 2/4 16-byte lanes.
1971 for (unsigned l = 0; l != NumElts; l += 16)
1972 for (unsigned i = 0; i != 16; ++i) {
1973 unsigned Idx = NumElts + i - Shift;
1974 if (Idx < NumElts)
1975 Idx -= NumElts - 16; // end of lane, switch operand.
1976 Idxs[l + i] = Idx + l;
1977 }
1978
1979 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1980 }
1981
1982 // Bitcast back to a 64-bit element type.
1983 return Builder.CreateBitCast(Res, ResultTy, "cast");
1984}
1985
1986// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1987// to byte shuffles.
1989 unsigned Shift) {
1990 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1991 unsigned NumElts = ResultTy->getNumElements() * 8;
1992
1993 // Bitcast from a 64-bit element type to a byte element type.
1994 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1995 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1996
1997 // We'll be shuffling in zeroes.
1998 Value *Res = Constant::getNullValue(VecTy);
1999
2000 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2001 // we'll just return the zero vector.
2002 if (Shift < 16) {
2003 int Idxs[64];
2004 // 256/512-bit version is split into 2/4 16-byte lanes.
2005 for (unsigned l = 0; l != NumElts; l += 16)
2006 for (unsigned i = 0; i != 16; ++i) {
2007 unsigned Idx = i + Shift;
2008 if (Idx >= 16)
2009 Idx += NumElts - 16; // end of lane, switch operand.
2010 Idxs[l + i] = Idx + l;
2011 }
2012
2013 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
2014 }
2015
2016 // Bitcast back to a 64-bit element type.
2017 return Builder.CreateBitCast(Res, ResultTy, "cast");
2018}
2019
2020static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2021 unsigned NumElts) {
2022 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2024 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2025 Mask = Builder.CreateBitCast(Mask, MaskTy);
2026
2027 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2028 // i8 and we need to extract down to the right number of elements.
2029 if (NumElts <= 4) {
2030 int Indices[4];
2031 for (unsigned i = 0; i != NumElts; ++i)
2032 Indices[i] = i;
2033 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2034 "extract");
2035 }
2036
2037 return Mask;
2038}
2039
2040static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2041 Value *Op1) {
2042 // If the mask is all ones just emit the first operation.
2043 if (const auto *C = dyn_cast<Constant>(Mask))
2044 if (C->isAllOnesValue())
2045 return Op0;
2046
2047 Mask = getX86MaskVec(Builder, Mask,
2048 cast<FixedVectorType>(Op0->getType())->getNumElements());
2049 return Builder.CreateSelect(Mask, Op0, Op1);
2050}
2051
2052static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2053 Value *Op1) {
2054 // If the mask is all ones just emit the first operation.
2055 if (const auto *C = dyn_cast<Constant>(Mask))
2056 if (C->isAllOnesValue())
2057 return Op0;
2058
2059 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2060 Mask->getType()->getIntegerBitWidth());
2061 Mask = Builder.CreateBitCast(Mask, MaskTy);
2062 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2063 return Builder.CreateSelect(Mask, Op0, Op1);
2064}
2065
2066// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2067// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2068// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2070 Value *Op1, Value *Shift,
2071 Value *Passthru, Value *Mask,
2072 bool IsVALIGN) {
2073 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2074
2075 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2076 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2077 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2078 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2079
2080 // Mask the immediate for VALIGN.
2081 if (IsVALIGN)
2082 ShiftVal &= (NumElts - 1);
2083
2084 // If palignr is shifting the pair of vectors more than the size of two
2085 // lanes, emit zero.
2086 if (ShiftVal >= 32)
2088
2089 // If palignr is shifting the pair of input vectors more than one lane,
2090 // but less than two lanes, convert to shifting in zeroes.
2091 if (ShiftVal > 16) {
2092 ShiftVal -= 16;
2093 Op1 = Op0;
2095 }
2096
2097 int Indices[64];
2098 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2099 for (unsigned l = 0; l < NumElts; l += 16) {
2100 for (unsigned i = 0; i != 16; ++i) {
2101 unsigned Idx = ShiftVal + i;
2102 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2103 Idx += NumElts - 16; // End of lane, switch operand.
2104 Indices[l + i] = Idx + l;
2105 }
2106 }
2107
2108 Value *Align = Builder.CreateShuffleVector(
2109 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2110
2111 return emitX86Select(Builder, Mask, Align, Passthru);
2112}
2113
2115 bool ZeroMask, bool IndexForm) {
2116 Type *Ty = CI.getType();
2117 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2118 unsigned EltWidth = Ty->getScalarSizeInBits();
2119 bool IsFloat = Ty->isFPOrFPVectorTy();
2120 Intrinsic::ID IID;
2121 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2123 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2125 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2127 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2129 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2131 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2133 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2135 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2137 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2139 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2141 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2143 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2145 else if (VecWidth == 128 && EltWidth == 16)
2146 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2147 else if (VecWidth == 256 && EltWidth == 16)
2148 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2149 else if (VecWidth == 512 && EltWidth == 16)
2150 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2151 else if (VecWidth == 128 && EltWidth == 8)
2152 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2153 else if (VecWidth == 256 && EltWidth == 8)
2154 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2155 else if (VecWidth == 512 && EltWidth == 8)
2156 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2157 else
2158 llvm_unreachable("Unexpected intrinsic");
2159
2160 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2161 CI.getArgOperand(2) };
2162
2163 // If this isn't index form we need to swap operand 0 and 1.
2164 if (!IndexForm)
2165 std::swap(Args[0], Args[1]);
2166
2167 Value *V = Builder.CreateIntrinsic(IID, Args);
2168 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2169 : Builder.CreateBitCast(CI.getArgOperand(1),
2170 Ty);
2171 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2172}
2173
2175 Intrinsic::ID IID) {
2176 Type *Ty = CI.getType();
2177 Value *Op0 = CI.getOperand(0);
2178 Value *Op1 = CI.getOperand(1);
2179 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2180
2181 if (CI.arg_size() == 4) { // For masked intrinsics.
2182 Value *VecSrc = CI.getOperand(2);
2183 Value *Mask = CI.getOperand(3);
2184 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2185 }
2186 return Res;
2187}
2188
2190 bool IsRotateRight) {
2191 Type *Ty = CI.getType();
2192 Value *Src = CI.getArgOperand(0);
2193 Value *Amt = CI.getArgOperand(1);
2194
2195 // Amount may be scalar immediate, in which case create a splat vector.
2196 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2197 // we only care about the lowest log2 bits anyway.
2198 if (Amt->getType() != Ty) {
2199 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2200 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2201 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2202 }
2203
2204 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2205 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2206
2207 if (CI.arg_size() == 4) { // For masked intrinsics.
2208 Value *VecSrc = CI.getOperand(2);
2209 Value *Mask = CI.getOperand(3);
2210 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2211 }
2212 return Res;
2213}
2214
2215static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2216 bool IsSigned) {
2217 Type *Ty = CI.getType();
2218 Value *LHS = CI.getArgOperand(0);
2219 Value *RHS = CI.getArgOperand(1);
2220
2221 CmpInst::Predicate Pred;
2222 switch (Imm) {
2223 case 0x0:
2224 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2225 break;
2226 case 0x1:
2227 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2228 break;
2229 case 0x2:
2230 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2231 break;
2232 case 0x3:
2233 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2234 break;
2235 case 0x4:
2236 Pred = ICmpInst::ICMP_EQ;
2237 break;
2238 case 0x5:
2239 Pred = ICmpInst::ICMP_NE;
2240 break;
2241 case 0x6:
2242 return Constant::getNullValue(Ty); // FALSE
2243 case 0x7:
2244 return Constant::getAllOnesValue(Ty); // TRUE
2245 default:
2246 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2247 }
2248
2249 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2250 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2251 return Ext;
2252}
2253
2255 bool IsShiftRight, bool ZeroMask) {
2256 Type *Ty = CI.getType();
2257 Value *Op0 = CI.getArgOperand(0);
2258 Value *Op1 = CI.getArgOperand(1);
2259 Value *Amt = CI.getArgOperand(2);
2260
2261 if (IsShiftRight)
2262 std::swap(Op0, Op1);
2263
2264 // Amount may be scalar immediate, in which case create a splat vector.
2265 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2266 // we only care about the lowest log2 bits anyway.
2267 if (Amt->getType() != Ty) {
2268 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2269 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2270 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2271 }
2272
2273 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2274 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2275
2276 unsigned NumArgs = CI.arg_size();
2277 if (NumArgs >= 4) { // For masked intrinsics.
2278 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2279 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2280 CI.getArgOperand(0);
2281 Value *Mask = CI.getOperand(NumArgs - 1);
2282 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2283 }
2284 return Res;
2285}
2286
2288 Value *Mask, bool Aligned) {
2289 const Align Alignment =
2290 Aligned
2291 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2292 : Align(1);
2293
2294 // If the mask is all ones just emit a regular store.
2295 if (const auto *C = dyn_cast<Constant>(Mask))
2296 if (C->isAllOnesValue())
2297 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2298
2299 // Convert the mask from an integer type to a vector of i1.
2300 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2301 Mask = getX86MaskVec(Builder, Mask, NumElts);
2302 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2303}
2304
2306 Value *Passthru, Value *Mask, bool Aligned) {
2307 Type *ValTy = Passthru->getType();
2308 const Align Alignment =
2309 Aligned
2310 ? Align(
2312 8)
2313 : Align(1);
2314
2315 // If the mask is all ones just emit a regular store.
2316 if (const auto *C = dyn_cast<Constant>(Mask))
2317 if (C->isAllOnesValue())
2318 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2319
2320 // Convert the mask from an integer type to a vector of i1.
2321 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2322 Mask = getX86MaskVec(Builder, Mask, NumElts);
2323 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2324}
2325
2326static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2327 Type *Ty = CI.getType();
2328 Value *Op0 = CI.getArgOperand(0);
2329 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2330 {Op0, Builder.getInt1(false)});
2331 if (CI.arg_size() == 3)
2332 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2333 return Res;
2334}
2335
2336static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2337 Type *Ty = CI.getType();
2338
2339 // Arguments have a vXi32 type so cast to vXi64.
2340 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2341 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2342
2343 if (IsSigned) {
2344 // Shift left then arithmetic shift right.
2345 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2346 LHS = Builder.CreateShl(LHS, ShiftAmt);
2347 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2348 RHS = Builder.CreateShl(RHS, ShiftAmt);
2349 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2350 } else {
2351 // Clear the upper bits.
2352 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2353 LHS = Builder.CreateAnd(LHS, Mask);
2354 RHS = Builder.CreateAnd(RHS, Mask);
2355 }
2356
2357 Value *Res = Builder.CreateMul(LHS, RHS);
2358
2359 if (CI.arg_size() == 4)
2360 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2361
2362 return Res;
2363}
2364
2365// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2367 Value *Mask) {
2368 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2369 if (Mask) {
2370 const auto *C = dyn_cast<Constant>(Mask);
2371 if (!C || !C->isAllOnesValue())
2372 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2373 }
2374
2375 if (NumElts < 8) {
2376 int Indices[8];
2377 for (unsigned i = 0; i != NumElts; ++i)
2378 Indices[i] = i;
2379 for (unsigned i = NumElts; i != 8; ++i)
2380 Indices[i] = NumElts + i % NumElts;
2381 Vec = Builder.CreateShuffleVector(Vec,
2383 Indices);
2384 }
2385 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2386}
2387
2389 unsigned CC, bool Signed) {
2390 Value *Op0 = CI.getArgOperand(0);
2391 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2392
2393 Value *Cmp;
2394 if (CC == 3) {
2396 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2397 } else if (CC == 7) {
2399 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2400 } else {
2402 switch (CC) {
2403 default: llvm_unreachable("Unknown condition code");
2404 case 0: Pred = ICmpInst::ICMP_EQ; break;
2405 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2406 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2407 case 4: Pred = ICmpInst::ICMP_NE; break;
2408 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2409 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2410 }
2411 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2412 }
2413
2414 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2415
2416 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2417}
2418
2419// Replace a masked intrinsic with an older unmasked intrinsic.
2421 Intrinsic::ID IID) {
2422 Value *Rep =
2423 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2424 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2425}
2426
2428 Value* A = CI.getArgOperand(0);
2429 Value* B = CI.getArgOperand(1);
2430 Value* Src = CI.getArgOperand(2);
2431 Value* Mask = CI.getArgOperand(3);
2432
2433 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2434 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2435 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2436 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2437 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2438 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2439}
2440
2442 Value* Op = CI.getArgOperand(0);
2443 Type* ReturnOp = CI.getType();
2444 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2445 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2446 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2447}
2448
2449// Replace intrinsic with unmasked version and a select.
2451 CallBase &CI, Value *&Rep) {
2452 Name = Name.substr(12); // Remove avx512.mask.
2453
2454 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2455 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2456 Intrinsic::ID IID;
2457 if (Name.starts_with("max.p")) {
2458 if (VecWidth == 128 && EltWidth == 32)
2459 IID = Intrinsic::x86_sse_max_ps;
2460 else if (VecWidth == 128 && EltWidth == 64)
2461 IID = Intrinsic::x86_sse2_max_pd;
2462 else if (VecWidth == 256 && EltWidth == 32)
2463 IID = Intrinsic::x86_avx_max_ps_256;
2464 else if (VecWidth == 256 && EltWidth == 64)
2465 IID = Intrinsic::x86_avx_max_pd_256;
2466 else
2467 llvm_unreachable("Unexpected intrinsic");
2468 } else if (Name.starts_with("min.p")) {
2469 if (VecWidth == 128 && EltWidth == 32)
2470 IID = Intrinsic::x86_sse_min_ps;
2471 else if (VecWidth == 128 && EltWidth == 64)
2472 IID = Intrinsic::x86_sse2_min_pd;
2473 else if (VecWidth == 256 && EltWidth == 32)
2474 IID = Intrinsic::x86_avx_min_ps_256;
2475 else if (VecWidth == 256 && EltWidth == 64)
2476 IID = Intrinsic::x86_avx_min_pd_256;
2477 else
2478 llvm_unreachable("Unexpected intrinsic");
2479 } else if (Name.starts_with("pshuf.b.")) {
2480 if (VecWidth == 128)
2481 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2482 else if (VecWidth == 256)
2483 IID = Intrinsic::x86_avx2_pshuf_b;
2484 else if (VecWidth == 512)
2485 IID = Intrinsic::x86_avx512_pshuf_b_512;
2486 else
2487 llvm_unreachable("Unexpected intrinsic");
2488 } else if (Name.starts_with("pmul.hr.sw.")) {
2489 if (VecWidth == 128)
2490 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2491 else if (VecWidth == 256)
2492 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2493 else if (VecWidth == 512)
2494 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2495 else
2496 llvm_unreachable("Unexpected intrinsic");
2497 } else if (Name.starts_with("pmulh.w.")) {
2498 if (VecWidth == 128)
2499 IID = Intrinsic::x86_sse2_pmulh_w;
2500 else if (VecWidth == 256)
2501 IID = Intrinsic::x86_avx2_pmulh_w;
2502 else if (VecWidth == 512)
2503 IID = Intrinsic::x86_avx512_pmulh_w_512;
2504 else
2505 llvm_unreachable("Unexpected intrinsic");
2506 } else if (Name.starts_with("pmulhu.w.")) {
2507 if (VecWidth == 128)
2508 IID = Intrinsic::x86_sse2_pmulhu_w;
2509 else if (VecWidth == 256)
2510 IID = Intrinsic::x86_avx2_pmulhu_w;
2511 else if (VecWidth == 512)
2512 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2513 else
2514 llvm_unreachable("Unexpected intrinsic");
2515 } else if (Name.starts_with("pmaddw.d.")) {
2516 if (VecWidth == 128)
2517 IID = Intrinsic::x86_sse2_pmadd_wd;
2518 else if (VecWidth == 256)
2519 IID = Intrinsic::x86_avx2_pmadd_wd;
2520 else if (VecWidth == 512)
2521 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2522 else
2523 llvm_unreachable("Unexpected intrinsic");
2524 } else if (Name.starts_with("pmaddubs.w.")) {
2525 if (VecWidth == 128)
2526 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2527 else if (VecWidth == 256)
2528 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2529 else if (VecWidth == 512)
2530 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2531 else
2532 llvm_unreachable("Unexpected intrinsic");
2533 } else if (Name.starts_with("packsswb.")) {
2534 if (VecWidth == 128)
2535 IID = Intrinsic::x86_sse2_packsswb_128;
2536 else if (VecWidth == 256)
2537 IID = Intrinsic::x86_avx2_packsswb;
2538 else if (VecWidth == 512)
2539 IID = Intrinsic::x86_avx512_packsswb_512;
2540 else
2541 llvm_unreachable("Unexpected intrinsic");
2542 } else if (Name.starts_with("packssdw.")) {
2543 if (VecWidth == 128)
2544 IID = Intrinsic::x86_sse2_packssdw_128;
2545 else if (VecWidth == 256)
2546 IID = Intrinsic::x86_avx2_packssdw;
2547 else if (VecWidth == 512)
2548 IID = Intrinsic::x86_avx512_packssdw_512;
2549 else
2550 llvm_unreachable("Unexpected intrinsic");
2551 } else if (Name.starts_with("packuswb.")) {
2552 if (VecWidth == 128)
2553 IID = Intrinsic::x86_sse2_packuswb_128;
2554 else if (VecWidth == 256)
2555 IID = Intrinsic::x86_avx2_packuswb;
2556 else if (VecWidth == 512)
2557 IID = Intrinsic::x86_avx512_packuswb_512;
2558 else
2559 llvm_unreachable("Unexpected intrinsic");
2560 } else if (Name.starts_with("packusdw.")) {
2561 if (VecWidth == 128)
2562 IID = Intrinsic::x86_sse41_packusdw;
2563 else if (VecWidth == 256)
2564 IID = Intrinsic::x86_avx2_packusdw;
2565 else if (VecWidth == 512)
2566 IID = Intrinsic::x86_avx512_packusdw_512;
2567 else
2568 llvm_unreachable("Unexpected intrinsic");
2569 } else if (Name.starts_with("vpermilvar.")) {
2570 if (VecWidth == 128 && EltWidth == 32)
2571 IID = Intrinsic::x86_avx_vpermilvar_ps;
2572 else if (VecWidth == 128 && EltWidth == 64)
2573 IID = Intrinsic::x86_avx_vpermilvar_pd;
2574 else if (VecWidth == 256 && EltWidth == 32)
2575 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2576 else if (VecWidth == 256 && EltWidth == 64)
2577 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2578 else if (VecWidth == 512 && EltWidth == 32)
2579 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2580 else if (VecWidth == 512 && EltWidth == 64)
2581 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2582 else
2583 llvm_unreachable("Unexpected intrinsic");
2584 } else if (Name == "cvtpd2dq.256") {
2585 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2586 } else if (Name == "cvtpd2ps.256") {
2587 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2588 } else if (Name == "cvttpd2dq.256") {
2589 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2590 } else if (Name == "cvttps2dq.128") {
2591 IID = Intrinsic::x86_sse2_cvttps2dq;
2592 } else if (Name == "cvttps2dq.256") {
2593 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2594 } else if (Name.starts_with("permvar.")) {
2595 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2596 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2597 IID = Intrinsic::x86_avx2_permps;
2598 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2599 IID = Intrinsic::x86_avx2_permd;
2600 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2601 IID = Intrinsic::x86_avx512_permvar_df_256;
2602 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_di_256;
2604 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2605 IID = Intrinsic::x86_avx512_permvar_sf_512;
2606 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2607 IID = Intrinsic::x86_avx512_permvar_si_512;
2608 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2609 IID = Intrinsic::x86_avx512_permvar_df_512;
2610 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2611 IID = Intrinsic::x86_avx512_permvar_di_512;
2612 else if (VecWidth == 128 && EltWidth == 16)
2613 IID = Intrinsic::x86_avx512_permvar_hi_128;
2614 else if (VecWidth == 256 && EltWidth == 16)
2615 IID = Intrinsic::x86_avx512_permvar_hi_256;
2616 else if (VecWidth == 512 && EltWidth == 16)
2617 IID = Intrinsic::x86_avx512_permvar_hi_512;
2618 else if (VecWidth == 128 && EltWidth == 8)
2619 IID = Intrinsic::x86_avx512_permvar_qi_128;
2620 else if (VecWidth == 256 && EltWidth == 8)
2621 IID = Intrinsic::x86_avx512_permvar_qi_256;
2622 else if (VecWidth == 512 && EltWidth == 8)
2623 IID = Intrinsic::x86_avx512_permvar_qi_512;
2624 else
2625 llvm_unreachable("Unexpected intrinsic");
2626 } else if (Name.starts_with("dbpsadbw.")) {
2627 if (VecWidth == 128)
2628 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2629 else if (VecWidth == 256)
2630 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2631 else if (VecWidth == 512)
2632 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2633 else
2634 llvm_unreachable("Unexpected intrinsic");
2635 } else if (Name.starts_with("pmultishift.qb.")) {
2636 if (VecWidth == 128)
2637 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2638 else if (VecWidth == 256)
2639 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2640 else if (VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2642 else
2643 llvm_unreachable("Unexpected intrinsic");
2644 } else if (Name.starts_with("conflict.")) {
2645 if (Name[9] == 'd' && VecWidth == 128)
2646 IID = Intrinsic::x86_avx512_conflict_d_128;
2647 else if (Name[9] == 'd' && VecWidth == 256)
2648 IID = Intrinsic::x86_avx512_conflict_d_256;
2649 else if (Name[9] == 'd' && VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_conflict_d_512;
2651 else if (Name[9] == 'q' && VecWidth == 128)
2652 IID = Intrinsic::x86_avx512_conflict_q_128;
2653 else if (Name[9] == 'q' && VecWidth == 256)
2654 IID = Intrinsic::x86_avx512_conflict_q_256;
2655 else if (Name[9] == 'q' && VecWidth == 512)
2656 IID = Intrinsic::x86_avx512_conflict_q_512;
2657 else
2658 llvm_unreachable("Unexpected intrinsic");
2659 } else if (Name.starts_with("pavg.")) {
2660 if (Name[5] == 'b' && VecWidth == 128)
2661 IID = Intrinsic::x86_sse2_pavg_b;
2662 else if (Name[5] == 'b' && VecWidth == 256)
2663 IID = Intrinsic::x86_avx2_pavg_b;
2664 else if (Name[5] == 'b' && VecWidth == 512)
2665 IID = Intrinsic::x86_avx512_pavg_b_512;
2666 else if (Name[5] == 'w' && VecWidth == 128)
2667 IID = Intrinsic::x86_sse2_pavg_w;
2668 else if (Name[5] == 'w' && VecWidth == 256)
2669 IID = Intrinsic::x86_avx2_pavg_w;
2670 else if (Name[5] == 'w' && VecWidth == 512)
2671 IID = Intrinsic::x86_avx512_pavg_w_512;
2672 else
2673 llvm_unreachable("Unexpected intrinsic");
2674 } else
2675 return false;
2676
2677 SmallVector<Value *, 4> Args(CI.args());
2678 Args.pop_back();
2679 Args.pop_back();
2680 Rep = Builder.CreateIntrinsic(IID, Args);
2681 unsigned NumArgs = CI.arg_size();
2682 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2683 CI.getArgOperand(NumArgs - 2));
2684 return true;
2685}
2686
2687/// Upgrade comment in call to inline asm that represents an objc retain release
2688/// marker.
2689void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2690 size_t Pos;
2691 if (AsmStr->find("mov\tfp") == 0 &&
2692 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2693 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2694 AsmStr->replace(Pos, 1, ";");
2695 }
2696}
2697
2699 Function *F, IRBuilder<> &Builder) {
2700 Value *Rep = nullptr;
2701
2702 if (Name == "abs.i" || Name == "abs.ll") {
2703 Value *Arg = CI->getArgOperand(0);
2704 Value *Neg = Builder.CreateNeg(Arg, "neg");
2705 Value *Cmp = Builder.CreateICmpSGE(
2706 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2707 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2708 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2709 Type *Ty = (Name == "abs.bf16")
2710 ? Builder.getBFloatTy()
2711 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2712 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2713 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2714 Rep = Builder.CreateBitCast(Abs, CI->getType());
2715 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2716 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2717 : Intrinsic::nvvm_fabs;
2718 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2719 } else if (Name.consume_front("ex2.approx.")) {
2720 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2721 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2722 : Intrinsic::nvvm_ex2_approx;
2723 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2724 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2725 Name.starts_with("atomic.load.add.f64.p")) {
2726 Value *Ptr = CI->getArgOperand(0);
2727 Value *Val = CI->getArgOperand(1);
2728 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2730 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2731 Name.starts_with("atomic.load.dec.32.p")) {
2732 Value *Ptr = CI->getArgOperand(0);
2733 Value *Val = CI->getArgOperand(1);
2734 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2736 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2738 } else if (Name == "clz.ll") {
2739 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2740 Value *Arg = CI->getArgOperand(0);
2741 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2742 {Arg, Builder.getFalse()},
2743 /*FMFSource=*/nullptr, "ctlz");
2744 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2745 } else if (Name == "popc.ll") {
2746 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2747 // i64.
2748 Value *Arg = CI->getArgOperand(0);
2749 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2750 Arg, /*FMFSource=*/nullptr, "ctpop");
2751 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2752 } else if (Name == "h2f") {
2753 Value *Cast =
2754 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2755 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2756 } else if (Name.consume_front("bitcast.") &&
2757 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2758 Name == "d2ll")) {
2759 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2760 } else if (Name == "rotate.b32") {
2761 Value *Arg = CI->getOperand(0);
2762 Value *ShiftAmt = CI->getOperand(1);
2763 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2764 {Arg, Arg, ShiftAmt});
2765 } else if (Name == "rotate.b64") {
2766 Type *Int64Ty = Builder.getInt64Ty();
2767 Value *Arg = CI->getOperand(0);
2768 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2769 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2770 {Arg, Arg, ZExtShiftAmt});
2771 } else if (Name == "rotate.right.b64") {
2772 Type *Int64Ty = Builder.getInt64Ty();
2773 Value *Arg = CI->getOperand(0);
2774 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2775 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2776 {Arg, Arg, ZExtShiftAmt});
2777 } else if (Name == "swap.lo.hi.b64") {
2778 Type *Int64Ty = Builder.getInt64Ty();
2779 Value *Arg = CI->getOperand(0);
2780 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2781 {Arg, Arg, Builder.getInt64(32)});
2782 } else if ((Name.consume_front("ptr.gen.to.") &&
2783 consumeNVVMPtrAddrSpace(Name)) ||
2784 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2785 Name.starts_with(".to.gen"))) {
2786 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2787 } else if (Name.consume_front("ldg.global")) {
2788 Value *Ptr = CI->getArgOperand(0);
2789 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2790 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2791 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2792 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2793 MDNode *MD = MDNode::get(Builder.getContext(), {});
2794 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2795 return LD;
2796 } else if (Name == "tanh.approx.f32") {
2797 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2798 FastMathFlags FMF;
2799 FMF.setApproxFunc();
2800 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2801 FMF);
2802 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2803 Value *Arg =
2804 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2805 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2806 {}, {Arg});
2807 } else if (Name == "barrier") {
2808 Rep = Builder.CreateIntrinsic(
2809 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2810 {CI->getArgOperand(0), CI->getArgOperand(1)});
2811 } else if (Name == "barrier.sync") {
2812 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2813 {CI->getArgOperand(0)});
2814 } else if (Name == "barrier.sync.cnt") {
2815 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2816 {CI->getArgOperand(0), CI->getArgOperand(1)});
2817 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2818 Name == "barrier0.or") {
2819 Value *C = CI->getArgOperand(0);
2820 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2821
2822 Intrinsic::ID IID =
2824 .Case("barrier0.popc",
2825 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2826 .Case("barrier0.and",
2827 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2828 .Case("barrier0.or",
2829 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2830 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2831 Rep = Builder.CreateZExt(Bar, CI->getType());
2832 } else {
2834 if (IID != Intrinsic::not_intrinsic &&
2835 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2836 rename(F);
2837 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2839 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2840 Value *Arg = CI->getArgOperand(I);
2841 Type *OldType = Arg->getType();
2842 Type *NewType = NewFn->getArg(I)->getType();
2843 Args.push_back(
2844 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2845 ? Builder.CreateBitCast(Arg, NewType)
2846 : Arg);
2847 }
2848 Rep = Builder.CreateCall(NewFn, Args);
2849 if (F->getReturnType()->isIntegerTy())
2850 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2851 }
2852 }
2853
2854 return Rep;
2855}
2856
2858 IRBuilder<> &Builder) {
2859 LLVMContext &C = F->getContext();
2860 Value *Rep = nullptr;
2861
2862 if (Name.starts_with("sse4a.movnt.")) {
2864 Elts.push_back(
2865 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2866 MDNode *Node = MDNode::get(C, Elts);
2867
2868 Value *Arg0 = CI->getArgOperand(0);
2869 Value *Arg1 = CI->getArgOperand(1);
2870
2871 // Nontemporal (unaligned) store of the 0'th element of the float/double
2872 // vector.
2873 Value *Extract =
2874 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2875
2876 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2877 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2878 } else if (Name.starts_with("avx.movnt.") ||
2879 Name.starts_with("avx512.storent.")) {
2881 Elts.push_back(
2882 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2883 MDNode *Node = MDNode::get(C, Elts);
2884
2885 Value *Arg0 = CI->getArgOperand(0);
2886 Value *Arg1 = CI->getArgOperand(1);
2887
2888 StoreInst *SI = Builder.CreateAlignedStore(
2889 Arg1, Arg0,
2891 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2892 } else if (Name == "sse2.storel.dq") {
2893 Value *Arg0 = CI->getArgOperand(0);
2894 Value *Arg1 = CI->getArgOperand(1);
2895
2896 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2897 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2898 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2899 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2900 } else if (Name.starts_with("sse.storeu.") ||
2901 Name.starts_with("sse2.storeu.") ||
2902 Name.starts_with("avx.storeu.")) {
2903 Value *Arg0 = CI->getArgOperand(0);
2904 Value *Arg1 = CI->getArgOperand(1);
2905 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2906 } else if (Name == "avx512.mask.store.ss") {
2907 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2908 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2909 Mask, false);
2910 } else if (Name.starts_with("avx512.mask.store")) {
2911 // "avx512.mask.storeu." or "avx512.mask.store."
2912 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2913 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2914 CI->getArgOperand(2), Aligned);
2915 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2916 // Upgrade packed integer vector compare intrinsics to compare instructions.
2917 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2918 bool CmpEq = Name[9] == 'e';
2919 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2920 CI->getArgOperand(0), CI->getArgOperand(1));
2921 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2922 } else if (Name.starts_with("avx512.broadcastm")) {
2923 Type *ExtTy = Type::getInt32Ty(C);
2924 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2925 ExtTy = Type::getInt64Ty(C);
2926 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2927 ExtTy->getPrimitiveSizeInBits();
2928 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2929 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2930 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2931 Value *Vec = CI->getArgOperand(0);
2932 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2933 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2934 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2935 } else if (Name.starts_with("avx.sqrt.p") ||
2936 Name.starts_with("sse2.sqrt.p") ||
2937 Name.starts_with("sse.sqrt.p")) {
2938 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2939 {CI->getArgOperand(0)});
2940 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2941 if (CI->arg_size() == 4 &&
2942 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2943 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2944 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2945 : Intrinsic::x86_avx512_sqrt_pd_512;
2946
2947 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2948 Rep = Builder.CreateIntrinsic(IID, Args);
2949 } else {
2950 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2951 {CI->getArgOperand(0)});
2952 }
2953 Rep =
2954 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2955 } else if (Name.starts_with("avx512.ptestm") ||
2956 Name.starts_with("avx512.ptestnm")) {
2957 Value *Op0 = CI->getArgOperand(0);
2958 Value *Op1 = CI->getArgOperand(1);
2959 Value *Mask = CI->getArgOperand(2);
2960 Rep = Builder.CreateAnd(Op0, Op1);
2961 llvm::Type *Ty = Op0->getType();
2963 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2966 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2967 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2968 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2969 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2970 ->getNumElements();
2971 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2972 Rep =
2973 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2974 } else if (Name.starts_with("avx512.kunpck")) {
2975 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2976 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2977 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2978 int Indices[64];
2979 for (unsigned i = 0; i != NumElts; ++i)
2980 Indices[i] = i;
2981
2982 // First extract half of each vector. This gives better codegen than
2983 // doing it in a single shuffle.
2984 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2985 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2986 // Concat the vectors.
2987 // NOTE: Operands have to be swapped to match intrinsic definition.
2988 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2989 Rep = Builder.CreateBitCast(Rep, CI->getType());
2990 } else if (Name == "avx512.kand.w") {
2991 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2992 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2993 Rep = Builder.CreateAnd(LHS, RHS);
2994 Rep = Builder.CreateBitCast(Rep, CI->getType());
2995 } else if (Name == "avx512.kandn.w") {
2996 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2997 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2998 LHS = Builder.CreateNot(LHS);
2999 Rep = Builder.CreateAnd(LHS, RHS);
3000 Rep = Builder.CreateBitCast(Rep, CI->getType());
3001 } else if (Name == "avx512.kor.w") {
3002 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3003 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3004 Rep = Builder.CreateOr(LHS, RHS);
3005 Rep = Builder.CreateBitCast(Rep, CI->getType());
3006 } else if (Name == "avx512.kxor.w") {
3007 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3008 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3009 Rep = Builder.CreateXor(LHS, RHS);
3010 Rep = Builder.CreateBitCast(Rep, CI->getType());
3011 } else if (Name == "avx512.kxnor.w") {
3012 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3013 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3014 LHS = Builder.CreateNot(LHS);
3015 Rep = Builder.CreateXor(LHS, RHS);
3016 Rep = Builder.CreateBitCast(Rep, CI->getType());
3017 } else if (Name == "avx512.knot.w") {
3018 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3019 Rep = Builder.CreateNot(Rep);
3020 Rep = Builder.CreateBitCast(Rep, CI->getType());
3021 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3022 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3023 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3024 Rep = Builder.CreateOr(LHS, RHS);
3025 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3026 Value *C;
3027 if (Name[14] == 'c')
3028 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3029 else
3030 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3031 Rep = Builder.CreateICmpEQ(Rep, C);
3032 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3033 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3034 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3035 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3036 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3037 Type *I32Ty = Type::getInt32Ty(C);
3038 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3039 ConstantInt::get(I32Ty, 0));
3040 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3041 ConstantInt::get(I32Ty, 0));
3042 Value *EltOp;
3043 if (Name.contains(".add."))
3044 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3045 else if (Name.contains(".sub."))
3046 EltOp = Builder.CreateFSub(Elt0, Elt1);
3047 else if (Name.contains(".mul."))
3048 EltOp = Builder.CreateFMul(Elt0, Elt1);
3049 else
3050 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3051 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3052 ConstantInt::get(I32Ty, 0));
3053 } else if (Name.starts_with("avx512.mask.pcmp")) {
3054 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3055 bool CmpEq = Name[16] == 'e';
3056 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3057 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3058 Type *OpTy = CI->getArgOperand(0)->getType();
3059 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3060 Intrinsic::ID IID;
3061 switch (VecWidth) {
3062 default:
3063 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3064 break;
3065 case 128:
3066 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3067 break;
3068 case 256:
3069 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3070 break;
3071 case 512:
3072 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3073 break;
3074 }
3075
3076 Rep =
3077 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3078 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3079 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3080 Type *OpTy = CI->getArgOperand(0)->getType();
3081 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3082 unsigned EltWidth = OpTy->getScalarSizeInBits();
3083 Intrinsic::ID IID;
3084 if (VecWidth == 128 && EltWidth == 32)
3085 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3086 else if (VecWidth == 256 && EltWidth == 32)
3087 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3088 else if (VecWidth == 512 && EltWidth == 32)
3089 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3090 else if (VecWidth == 128 && EltWidth == 64)
3091 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3092 else if (VecWidth == 256 && EltWidth == 64)
3093 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3094 else if (VecWidth == 512 && EltWidth == 64)
3095 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3096 else
3097 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3098
3099 Rep =
3100 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3101 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3102 } else if (Name.starts_with("avx512.cmp.p")) {
3103 SmallVector<Value *, 4> Args(CI->args());
3104 Type *OpTy = Args[0]->getType();
3105 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3106 unsigned EltWidth = OpTy->getScalarSizeInBits();
3107 Intrinsic::ID IID;
3108 if (VecWidth == 128 && EltWidth == 32)
3109 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3110 else if (VecWidth == 256 && EltWidth == 32)
3111 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3112 else if (VecWidth == 512 && EltWidth == 32)
3113 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3114 else if (VecWidth == 128 && EltWidth == 64)
3115 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3116 else if (VecWidth == 256 && EltWidth == 64)
3117 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3118 else if (VecWidth == 512 && EltWidth == 64)
3119 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3120 else
3121 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3122
3124 if (VecWidth == 512)
3125 std::swap(Mask, Args.back());
3126 Args.push_back(Mask);
3127
3128 Rep = Builder.CreateIntrinsic(IID, Args);
3129 } else if (Name.starts_with("avx512.mask.cmp.")) {
3130 // Integer compare intrinsics.
3131 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3132 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3133 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3134 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3135 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3136 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3137 Name.starts_with("avx512.cvtw2mask.") ||
3138 Name.starts_with("avx512.cvtd2mask.") ||
3139 Name.starts_with("avx512.cvtq2mask.")) {
3140 Value *Op = CI->getArgOperand(0);
3141 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3142 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3143 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3144 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3145 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3146 Name.starts_with("avx512.mask.pabs")) {
3147 Rep = upgradeAbs(Builder, *CI);
3148 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3149 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3150 Name.starts_with("avx512.mask.pmaxs")) {
3151 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3152 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3153 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3154 Name.starts_with("avx512.mask.pmaxu")) {
3155 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3156 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3157 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3158 Name.starts_with("avx512.mask.pmins")) {
3159 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3160 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3161 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3162 Name.starts_with("avx512.mask.pminu")) {
3163 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3164 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3165 Name == "avx512.pmulu.dq.512" ||
3166 Name.starts_with("avx512.mask.pmulu.dq.")) {
3167 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3168 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3169 Name == "avx512.pmul.dq.512" ||
3170 Name.starts_with("avx512.mask.pmul.dq.")) {
3171 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3172 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3173 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3174 Rep =
3175 Builder.CreateSIToFP(CI->getArgOperand(1),
3176 cast<VectorType>(CI->getType())->getElementType());
3177 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3178 } else if (Name == "avx512.cvtusi2sd") {
3179 Rep =
3180 Builder.CreateUIToFP(CI->getArgOperand(1),
3181 cast<VectorType>(CI->getType())->getElementType());
3182 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3183 } else if (Name == "sse2.cvtss2sd") {
3184 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3185 Rep = Builder.CreateFPExt(
3186 Rep, cast<VectorType>(CI->getType())->getElementType());
3187 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3188 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3189 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3190 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3191 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3192 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3193 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3194 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3195 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3196 Name == "avx512.mask.cvtqq2ps.256" ||
3197 Name == "avx512.mask.cvtqq2ps.512" ||
3198 Name == "avx512.mask.cvtuqq2ps.256" ||
3199 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3200 Name == "avx.cvt.ps2.pd.256" ||
3201 Name == "avx512.mask.cvtps2pd.128" ||
3202 Name == "avx512.mask.cvtps2pd.256") {
3203 auto *DstTy = cast<FixedVectorType>(CI->getType());
3204 Rep = CI->getArgOperand(0);
3205 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3206
3207 unsigned NumDstElts = DstTy->getNumElements();
3208 if (NumDstElts < SrcTy->getNumElements()) {
3209 assert(NumDstElts == 2 && "Unexpected vector size");
3210 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3211 }
3212
3213 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3214 bool IsUnsigned = Name.contains("cvtu");
3215 if (IsPS2PD)
3216 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3217 else if (CI->arg_size() == 4 &&
3218 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3219 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3220 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3221 : Intrinsic::x86_avx512_sitofp_round;
3222 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3223 {Rep, CI->getArgOperand(3)});
3224 } else {
3225 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3226 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3227 }
3228
3229 if (CI->arg_size() >= 3)
3230 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3231 CI->getArgOperand(1));
3232 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3233 Name.starts_with("vcvtph2ps.")) {
3234 auto *DstTy = cast<FixedVectorType>(CI->getType());
3235 Rep = CI->getArgOperand(0);
3236 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3237 unsigned NumDstElts = DstTy->getNumElements();
3238 if (NumDstElts != SrcTy->getNumElements()) {
3239 assert(NumDstElts == 4 && "Unexpected vector size");
3240 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3241 }
3242 Rep = Builder.CreateBitCast(
3243 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3244 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3245 if (CI->arg_size() >= 3)
3246 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3247 CI->getArgOperand(1));
3248 } else if (Name.starts_with("avx512.mask.load")) {
3249 // "avx512.mask.loadu." or "avx512.mask.load."
3250 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3251 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3252 CI->getArgOperand(2), Aligned);
3253 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3254 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3255 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3256 ResultTy->getNumElements());
3257
3258 Rep = Builder.CreateIntrinsic(
3259 Intrinsic::masked_expandload, ResultTy,
3260 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3261 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3262 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3263 Value *MaskVec =
3264 getX86MaskVec(Builder, CI->getArgOperand(2),
3265 cast<FixedVectorType>(ResultTy)->getNumElements());
3266
3267 Rep = Builder.CreateIntrinsic(
3268 Intrinsic::masked_compressstore, ResultTy,
3269 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3270 } else if (Name.starts_with("avx512.mask.compress.") ||
3271 Name.starts_with("avx512.mask.expand.")) {
3272 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3273
3274 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3275 ResultTy->getNumElements());
3276
3277 bool IsCompress = Name[12] == 'c';
3278 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3279 : Intrinsic::x86_avx512_mask_expand;
3280 Rep = Builder.CreateIntrinsic(
3281 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3282 } else if (Name.starts_with("xop.vpcom")) {
3283 bool IsSigned;
3284 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3285 Name.ends_with("uq"))
3286 IsSigned = false;
3287 else if (Name.ends_with("b") || Name.ends_with("w") ||
3288 Name.ends_with("d") || Name.ends_with("q"))
3289 IsSigned = true;
3290 else
3291 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3292
3293 unsigned Imm;
3294 if (CI->arg_size() == 3) {
3295 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3296 } else {
3297 Name = Name.substr(9); // strip off "xop.vpcom"
3298 if (Name.starts_with("lt"))
3299 Imm = 0;
3300 else if (Name.starts_with("le"))
3301 Imm = 1;
3302 else if (Name.starts_with("gt"))
3303 Imm = 2;
3304 else if (Name.starts_with("ge"))
3305 Imm = 3;
3306 else if (Name.starts_with("eq"))
3307 Imm = 4;
3308 else if (Name.starts_with("ne"))
3309 Imm = 5;
3310 else if (Name.starts_with("false"))
3311 Imm = 6;
3312 else if (Name.starts_with("true"))
3313 Imm = 7;
3314 else
3315 llvm_unreachable("Unknown condition");
3316 }
3317
3318 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3319 } else if (Name.starts_with("xop.vpcmov")) {
3320 Value *Sel = CI->getArgOperand(2);
3321 Value *NotSel = Builder.CreateNot(Sel);
3322 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3323 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3324 Rep = Builder.CreateOr(Sel0, Sel1);
3325 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3326 Name.starts_with("avx512.mask.prol")) {
3327 Rep = upgradeX86Rotate(Builder, *CI, false);
3328 } else if (Name.starts_with("avx512.pror") ||
3329 Name.starts_with("avx512.mask.pror")) {
3330 Rep = upgradeX86Rotate(Builder, *CI, true);
3331 } else if (Name.starts_with("avx512.vpshld.") ||
3332 Name.starts_with("avx512.mask.vpshld") ||
3333 Name.starts_with("avx512.maskz.vpshld")) {
3334 bool ZeroMask = Name[11] == 'z';
3335 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3336 } else if (Name.starts_with("avx512.vpshrd.") ||
3337 Name.starts_with("avx512.mask.vpshrd") ||
3338 Name.starts_with("avx512.maskz.vpshrd")) {
3339 bool ZeroMask = Name[11] == 'z';
3340 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3341 } else if (Name == "sse42.crc32.64.8") {
3342 Value *Trunc0 =
3343 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3344 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3345 {Trunc0, CI->getArgOperand(1)});
3346 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3347 } else if (Name.starts_with("avx.vbroadcast.s") ||
3348 Name.starts_with("avx512.vbroadcast.s")) {
3349 // Replace broadcasts with a series of insertelements.
3350 auto *VecTy = cast<FixedVectorType>(CI->getType());
3351 Type *EltTy = VecTy->getElementType();
3352 unsigned EltNum = VecTy->getNumElements();
3353 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3354 Type *I32Ty = Type::getInt32Ty(C);
3355 Rep = PoisonValue::get(VecTy);
3356 for (unsigned I = 0; I < EltNum; ++I)
3357 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3358 } else if (Name.starts_with("sse41.pmovsx") ||
3359 Name.starts_with("sse41.pmovzx") ||
3360 Name.starts_with("avx2.pmovsx") ||
3361 Name.starts_with("avx2.pmovzx") ||
3362 Name.starts_with("avx512.mask.pmovsx") ||
3363 Name.starts_with("avx512.mask.pmovzx")) {
3364 auto *DstTy = cast<FixedVectorType>(CI->getType());
3365 unsigned NumDstElts = DstTy->getNumElements();
3366
3367 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3368 SmallVector<int, 8> ShuffleMask(NumDstElts);
3369 for (unsigned i = 0; i != NumDstElts; ++i)
3370 ShuffleMask[i] = i;
3371
3372 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3373
3374 bool DoSext = Name.contains("pmovsx");
3375 Rep =
3376 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3377 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3378 if (CI->arg_size() == 3)
3379 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3380 CI->getArgOperand(1));
3381 } else if (Name == "avx512.mask.pmov.qd.256" ||
3382 Name == "avx512.mask.pmov.qd.512" ||
3383 Name == "avx512.mask.pmov.wb.256" ||
3384 Name == "avx512.mask.pmov.wb.512") {
3385 Type *Ty = CI->getArgOperand(1)->getType();
3386 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3387 Rep =
3388 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3389 } else if (Name.starts_with("avx.vbroadcastf128") ||
3390 Name == "avx2.vbroadcasti128") {
3391 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3392 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3393 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3394 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3395 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3396 if (NumSrcElts == 2)
3397 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3398 else
3399 Rep = Builder.CreateShuffleVector(Load,
3400 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3401 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3402 Name.starts_with("avx512.mask.shuf.f")) {
3403 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3404 Type *VT = CI->getType();
3405 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3406 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3407 unsigned ControlBitsMask = NumLanes - 1;
3408 unsigned NumControlBits = NumLanes / 2;
3409 SmallVector<int, 8> ShuffleMask(0);
3410
3411 for (unsigned l = 0; l != NumLanes; ++l) {
3412 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3413 // We actually need the other source.
3414 if (l >= NumLanes / 2)
3415 LaneMask += NumLanes;
3416 for (unsigned i = 0; i != NumElementsInLane; ++i)
3417 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3418 }
3419 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3420 CI->getArgOperand(1), ShuffleMask);
3421 Rep =
3422 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3423 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3424 Name.starts_with("avx512.mask.broadcasti")) {
3425 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3426 ->getNumElements();
3427 unsigned NumDstElts =
3428 cast<FixedVectorType>(CI->getType())->getNumElements();
3429
3430 SmallVector<int, 8> ShuffleMask(NumDstElts);
3431 for (unsigned i = 0; i != NumDstElts; ++i)
3432 ShuffleMask[i] = i % NumSrcElts;
3433
3434 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3435 CI->getArgOperand(0), ShuffleMask);
3436 Rep =
3437 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3438 } else if (Name.starts_with("avx2.pbroadcast") ||
3439 Name.starts_with("avx2.vbroadcast") ||
3440 Name.starts_with("avx512.pbroadcast") ||
3441 Name.starts_with("avx512.mask.broadcast.s")) {
3442 // Replace vp?broadcasts with a vector shuffle.
3443 Value *Op = CI->getArgOperand(0);
3444 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3445 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3448 Rep = Builder.CreateShuffleVector(Op, M);
3449
3450 if (CI->arg_size() == 3)
3451 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3452 CI->getArgOperand(1));
3453 } else if (Name.starts_with("sse2.padds.") ||
3454 Name.starts_with("avx2.padds.") ||
3455 Name.starts_with("avx512.padds.") ||
3456 Name.starts_with("avx512.mask.padds.")) {
3457 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3458 } else if (Name.starts_with("sse2.psubs.") ||
3459 Name.starts_with("avx2.psubs.") ||
3460 Name.starts_with("avx512.psubs.") ||
3461 Name.starts_with("avx512.mask.psubs.")) {
3462 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3463 } else if (Name.starts_with("sse2.paddus.") ||
3464 Name.starts_with("avx2.paddus.") ||
3465 Name.starts_with("avx512.mask.paddus.")) {
3466 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3467 } else if (Name.starts_with("sse2.psubus.") ||
3468 Name.starts_with("avx2.psubus.") ||
3469 Name.starts_with("avx512.mask.psubus.")) {
3470 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3471 } else if (Name.starts_with("avx512.mask.palignr.")) {
3472 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3473 CI->getArgOperand(1), CI->getArgOperand(2),
3474 CI->getArgOperand(3), CI->getArgOperand(4),
3475 false);
3476 } else if (Name.starts_with("avx512.mask.valign.")) {
3478 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3479 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3480 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3481 // 128/256-bit shift left specified in bits.
3482 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3483 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3484 Shift / 8); // Shift is in bits.
3485 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3486 // 128/256-bit shift right specified in bits.
3487 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3488 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3489 Shift / 8); // Shift is in bits.
3490 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3491 Name == "avx512.psll.dq.512") {
3492 // 128/256/512-bit shift left specified in bytes.
3493 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3494 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3495 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3496 Name == "avx512.psrl.dq.512") {
3497 // 128/256/512-bit shift right specified in bytes.
3498 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3499 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3500 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3501 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3502 Name.starts_with("avx2.pblendd.")) {
3503 Value *Op0 = CI->getArgOperand(0);
3504 Value *Op1 = CI->getArgOperand(1);
3505 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3506 auto *VecTy = cast<FixedVectorType>(CI->getType());
3507 unsigned NumElts = VecTy->getNumElements();
3508
3509 SmallVector<int, 16> Idxs(NumElts);
3510 for (unsigned i = 0; i != NumElts; ++i)
3511 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3512
3513 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3514 } else if (Name.starts_with("avx.vinsertf128.") ||
3515 Name == "avx2.vinserti128" ||
3516 Name.starts_with("avx512.mask.insert")) {
3517 Value *Op0 = CI->getArgOperand(0);
3518 Value *Op1 = CI->getArgOperand(1);
3519 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3520 unsigned DstNumElts =
3521 cast<FixedVectorType>(CI->getType())->getNumElements();
3522 unsigned SrcNumElts =
3523 cast<FixedVectorType>(Op1->getType())->getNumElements();
3524 unsigned Scale = DstNumElts / SrcNumElts;
3525
3526 // Mask off the high bits of the immediate value; hardware ignores those.
3527 Imm = Imm % Scale;
3528
3529 // Extend the second operand into a vector the size of the destination.
3530 SmallVector<int, 8> Idxs(DstNumElts);
3531 for (unsigned i = 0; i != SrcNumElts; ++i)
3532 Idxs[i] = i;
3533 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3534 Idxs[i] = SrcNumElts;
3535 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3536
3537 // Insert the second operand into the first operand.
3538
3539 // Note that there is no guarantee that instruction lowering will actually
3540 // produce a vinsertf128 instruction for the created shuffles. In
3541 // particular, the 0 immediate case involves no lane changes, so it can
3542 // be handled as a blend.
3543
3544 // Example of shuffle mask for 32-bit elements:
3545 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3546 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3547
3548 // First fill with identify mask.
3549 for (unsigned i = 0; i != DstNumElts; ++i)
3550 Idxs[i] = i;
3551 // Then replace the elements where we need to insert.
3552 for (unsigned i = 0; i != SrcNumElts; ++i)
3553 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3554 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3555
3556 // If the intrinsic has a mask operand, handle that.
3557 if (CI->arg_size() == 5)
3558 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3559 CI->getArgOperand(3));
3560 } else if (Name.starts_with("avx.vextractf128.") ||
3561 Name == "avx2.vextracti128" ||
3562 Name.starts_with("avx512.mask.vextract")) {
3563 Value *Op0 = CI->getArgOperand(0);
3564 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3565 unsigned DstNumElts =
3566 cast<FixedVectorType>(CI->getType())->getNumElements();
3567 unsigned SrcNumElts =
3568 cast<FixedVectorType>(Op0->getType())->getNumElements();
3569 unsigned Scale = SrcNumElts / DstNumElts;
3570
3571 // Mask off the high bits of the immediate value; hardware ignores those.
3572 Imm = Imm % Scale;
3573
3574 // Get indexes for the subvector of the input vector.
3575 SmallVector<int, 8> Idxs(DstNumElts);
3576 for (unsigned i = 0; i != DstNumElts; ++i) {
3577 Idxs[i] = i + (Imm * DstNumElts);
3578 }
3579 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3580
3581 // If the intrinsic has a mask operand, handle that.
3582 if (CI->arg_size() == 4)
3583 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3584 CI->getArgOperand(2));
3585 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3586 Name.starts_with("avx512.mask.perm.di.")) {
3587 Value *Op0 = CI->getArgOperand(0);
3588 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3589 auto *VecTy = cast<FixedVectorType>(CI->getType());
3590 unsigned NumElts = VecTy->getNumElements();
3591
3592 SmallVector<int, 8> Idxs(NumElts);
3593 for (unsigned i = 0; i != NumElts; ++i)
3594 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3595
3596 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3597
3598 if (CI->arg_size() == 4)
3599 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3600 CI->getArgOperand(2));
3601 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3602 // The immediate permute control byte looks like this:
3603 // [1:0] - select 128 bits from sources for low half of destination
3604 // [2] - ignore
3605 // [3] - zero low half of destination
3606 // [5:4] - select 128 bits from sources for high half of destination
3607 // [6] - ignore
3608 // [7] - zero high half of destination
3609
3610 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3611
3612 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3613 unsigned HalfSize = NumElts / 2;
3614 SmallVector<int, 8> ShuffleMask(NumElts);
3615
3616 // Determine which operand(s) are actually in use for this instruction.
3617 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3618 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3619
3620 // If needed, replace operands based on zero mask.
3621 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3622 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3623
3624 // Permute low half of result.
3625 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3626 for (unsigned i = 0; i < HalfSize; ++i)
3627 ShuffleMask[i] = StartIndex + i;
3628
3629 // Permute high half of result.
3630 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3631 for (unsigned i = 0; i < HalfSize; ++i)
3632 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3633
3634 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3635
3636 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3637 Name.starts_with("avx512.mask.vpermil.p") ||
3638 Name.starts_with("avx512.mask.pshuf.d.")) {
3639 Value *Op0 = CI->getArgOperand(0);
3640 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3641 auto *VecTy = cast<FixedVectorType>(CI->getType());
3642 unsigned NumElts = VecTy->getNumElements();
3643 // Calculate the size of each index in the immediate.
3644 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3645 unsigned IdxMask = ((1 << IdxSize) - 1);
3646
3647 SmallVector<int, 8> Idxs(NumElts);
3648 // Lookup the bits for this element, wrapping around the immediate every
3649 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3650 // to offset by the first index of each group.
3651 for (unsigned i = 0; i != NumElts; ++i)
3652 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3653
3654 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3655
3656 if (CI->arg_size() == 4)
3657 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3658 CI->getArgOperand(2));
3659 } else if (Name == "sse2.pshufl.w" ||
3660 Name.starts_with("avx512.mask.pshufl.w.")) {
3661 Value *Op0 = CI->getArgOperand(0);
3662 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3663 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3664
3665 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3666 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3667
3668 SmallVector<int, 16> Idxs(NumElts);
3669 for (unsigned l = 0; l != NumElts; l += 8) {
3670 for (unsigned i = 0; i != 4; ++i)
3671 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3672 for (unsigned i = 4; i != 8; ++i)
3673 Idxs[i + l] = i + l;
3674 }
3675
3676 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3677
3678 if (CI->arg_size() == 4)
3679 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3680 CI->getArgOperand(2));
3681 } else if (Name == "sse2.pshufh.w" ||
3682 Name.starts_with("avx512.mask.pshufh.w.")) {
3683 Value *Op0 = CI->getArgOperand(0);
3684 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3685 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3686
3687 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3688 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3689
3690 SmallVector<int, 16> Idxs(NumElts);
3691 for (unsigned l = 0; l != NumElts; l += 8) {
3692 for (unsigned i = 0; i != 4; ++i)
3693 Idxs[i + l] = i + l;
3694 for (unsigned i = 0; i != 4; ++i)
3695 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3696 }
3697
3698 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3699
3700 if (CI->arg_size() == 4)
3701 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3702 CI->getArgOperand(2));
3703 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3704 Value *Op0 = CI->getArgOperand(0);
3705 Value *Op1 = CI->getArgOperand(1);
3706 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3707 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3708
3709 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3710 unsigned HalfLaneElts = NumLaneElts / 2;
3711
3712 SmallVector<int, 16> Idxs(NumElts);
3713 for (unsigned i = 0; i != NumElts; ++i) {
3714 // Base index is the starting element of the lane.
3715 Idxs[i] = i - (i % NumLaneElts);
3716 // If we are half way through the lane switch to the other source.
3717 if ((i % NumLaneElts) >= HalfLaneElts)
3718 Idxs[i] += NumElts;
3719 // Now select the specific element. By adding HalfLaneElts bits from
3720 // the immediate. Wrapping around the immediate every 8-bits.
3721 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3722 }
3723
3724 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3725
3726 Rep =
3727 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3728 } else if (Name.starts_with("avx512.mask.movddup") ||
3729 Name.starts_with("avx512.mask.movshdup") ||
3730 Name.starts_with("avx512.mask.movsldup")) {
3731 Value *Op0 = CI->getArgOperand(0);
3732 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3733 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3734
3735 unsigned Offset = 0;
3736 if (Name.starts_with("avx512.mask.movshdup."))
3737 Offset = 1;
3738
3739 SmallVector<int, 16> Idxs(NumElts);
3740 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3741 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3742 Idxs[i + l + 0] = i + l + Offset;
3743 Idxs[i + l + 1] = i + l + Offset;
3744 }
3745
3746 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3747
3748 Rep =
3749 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3750 } else if (Name.starts_with("avx512.mask.punpckl") ||
3751 Name.starts_with("avx512.mask.unpckl.")) {
3752 Value *Op0 = CI->getArgOperand(0);
3753 Value *Op1 = CI->getArgOperand(1);
3754 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3755 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3756
3757 SmallVector<int, 64> Idxs(NumElts);
3758 for (int l = 0; l != NumElts; l += NumLaneElts)
3759 for (int i = 0; i != NumLaneElts; ++i)
3760 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3761
3762 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3763
3764 Rep =
3765 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3766 } else if (Name.starts_with("avx512.mask.punpckh") ||
3767 Name.starts_with("avx512.mask.unpckh.")) {
3768 Value *Op0 = CI->getArgOperand(0);
3769 Value *Op1 = CI->getArgOperand(1);
3770 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3771 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3772
3773 SmallVector<int, 64> Idxs(NumElts);
3774 for (int l = 0; l != NumElts; l += NumLaneElts)
3775 for (int i = 0; i != NumLaneElts; ++i)
3776 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3777
3778 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3779
3780 Rep =
3781 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3782 } else if (Name.starts_with("avx512.mask.and.") ||
3783 Name.starts_with("avx512.mask.pand.")) {
3784 VectorType *FTy = cast<VectorType>(CI->getType());
3786 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3787 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3788 Rep = Builder.CreateBitCast(Rep, FTy);
3789 Rep =
3790 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3791 } else if (Name.starts_with("avx512.mask.andn.") ||
3792 Name.starts_with("avx512.mask.pandn.")) {
3793 VectorType *FTy = cast<VectorType>(CI->getType());
3795 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3796 Rep = Builder.CreateAnd(Rep,
3797 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3798 Rep = Builder.CreateBitCast(Rep, FTy);
3799 Rep =
3800 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3801 } else if (Name.starts_with("avx512.mask.or.") ||
3802 Name.starts_with("avx512.mask.por.")) {
3803 VectorType *FTy = cast<VectorType>(CI->getType());
3805 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3806 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3807 Rep = Builder.CreateBitCast(Rep, FTy);
3808 Rep =
3809 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3810 } else if (Name.starts_with("avx512.mask.xor.") ||
3811 Name.starts_with("avx512.mask.pxor.")) {
3812 VectorType *FTy = cast<VectorType>(CI->getType());
3814 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3815 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3816 Rep = Builder.CreateBitCast(Rep, FTy);
3817 Rep =
3818 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3819 } else if (Name.starts_with("avx512.mask.padd.")) {
3820 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3821 Rep =
3822 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3823 } else if (Name.starts_with("avx512.mask.psub.")) {
3824 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3825 Rep =
3826 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3827 } else if (Name.starts_with("avx512.mask.pmull.")) {
3828 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3829 Rep =
3830 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3831 } else if (Name.starts_with("avx512.mask.add.p")) {
3832 if (Name.ends_with(".512")) {
3833 Intrinsic::ID IID;
3834 if (Name[17] == 's')
3835 IID = Intrinsic::x86_avx512_add_ps_512;
3836 else
3837 IID = Intrinsic::x86_avx512_add_pd_512;
3838
3839 Rep = Builder.CreateIntrinsic(
3840 IID,
3841 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3842 } else {
3843 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3844 }
3845 Rep =
3846 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3847 } else if (Name.starts_with("avx512.mask.div.p")) {
3848 if (Name.ends_with(".512")) {
3849 Intrinsic::ID IID;
3850 if (Name[17] == 's')
3851 IID = Intrinsic::x86_avx512_div_ps_512;
3852 else
3853 IID = Intrinsic::x86_avx512_div_pd_512;
3854
3855 Rep = Builder.CreateIntrinsic(
3856 IID,
3857 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3858 } else {
3859 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3860 }
3861 Rep =
3862 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3863 } else if (Name.starts_with("avx512.mask.mul.p")) {
3864 if (Name.ends_with(".512")) {
3865 Intrinsic::ID IID;
3866 if (Name[17] == 's')
3867 IID = Intrinsic::x86_avx512_mul_ps_512;
3868 else
3869 IID = Intrinsic::x86_avx512_mul_pd_512;
3870
3871 Rep = Builder.CreateIntrinsic(
3872 IID,
3873 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3874 } else {
3875 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3876 }
3877 Rep =
3878 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3879 } else if (Name.starts_with("avx512.mask.sub.p")) {
3880 if (Name.ends_with(".512")) {
3881 Intrinsic::ID IID;
3882 if (Name[17] == 's')
3883 IID = Intrinsic::x86_avx512_sub_ps_512;
3884 else
3885 IID = Intrinsic::x86_avx512_sub_pd_512;
3886
3887 Rep = Builder.CreateIntrinsic(
3888 IID,
3889 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3890 } else {
3891 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3892 }
3893 Rep =
3894 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3895 } else if ((Name.starts_with("avx512.mask.max.p") ||
3896 Name.starts_with("avx512.mask.min.p")) &&
3897 Name.drop_front(18) == ".512") {
3898 bool IsDouble = Name[17] == 'd';
3899 bool IsMin = Name[13] == 'i';
3900 static const Intrinsic::ID MinMaxTbl[2][2] = {
3901 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3902 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3903 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3904
3905 Rep = Builder.CreateIntrinsic(
3906 IID,
3907 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3908 Rep =
3909 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3910 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3911 Rep =
3912 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3913 {CI->getArgOperand(0), Builder.getInt1(false)});
3914 Rep =
3915 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3916 } else if (Name.starts_with("avx512.mask.psll")) {
3917 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3918 bool IsVariable = Name[16] == 'v';
3919 char Size = Name[16] == '.' ? Name[17]
3920 : Name[17] == '.' ? Name[18]
3921 : Name[18] == '.' ? Name[19]
3922 : Name[20];
3923
3924 Intrinsic::ID IID;
3925 if (IsVariable && Name[17] != '.') {
3926 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3927 IID = Intrinsic::x86_avx2_psllv_q;
3928 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3929 IID = Intrinsic::x86_avx2_psllv_q_256;
3930 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3931 IID = Intrinsic::x86_avx2_psllv_d;
3932 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3933 IID = Intrinsic::x86_avx2_psllv_d_256;
3934 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3935 IID = Intrinsic::x86_avx512_psllv_w_128;
3936 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3937 IID = Intrinsic::x86_avx512_psllv_w_256;
3938 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3939 IID = Intrinsic::x86_avx512_psllv_w_512;
3940 else
3941 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3942 } else if (Name.ends_with(".128")) {
3943 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3944 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3945 : Intrinsic::x86_sse2_psll_d;
3946 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3947 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3948 : Intrinsic::x86_sse2_psll_q;
3949 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3950 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3951 : Intrinsic::x86_sse2_psll_w;
3952 else
3953 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3954 } else if (Name.ends_with(".256")) {
3955 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3956 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3957 : Intrinsic::x86_avx2_psll_d;
3958 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3959 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3960 : Intrinsic::x86_avx2_psll_q;
3961 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3962 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3963 : Intrinsic::x86_avx2_psll_w;
3964 else
3965 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3966 } else {
3967 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3968 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3969 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3970 : Intrinsic::x86_avx512_psll_d_512;
3971 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3972 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3973 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3974 : Intrinsic::x86_avx512_psll_q_512;
3975 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3976 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3977 : Intrinsic::x86_avx512_psll_w_512;
3978 else
3979 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3980 }
3981
3982 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3983 } else if (Name.starts_with("avx512.mask.psrl")) {
3984 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3985 bool IsVariable = Name[16] == 'v';
3986 char Size = Name[16] == '.' ? Name[17]
3987 : Name[17] == '.' ? Name[18]
3988 : Name[18] == '.' ? Name[19]
3989 : Name[20];
3990
3991 Intrinsic::ID IID;
3992 if (IsVariable && Name[17] != '.') {
3993 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3994 IID = Intrinsic::x86_avx2_psrlv_q;
3995 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3996 IID = Intrinsic::x86_avx2_psrlv_q_256;
3997 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3998 IID = Intrinsic::x86_avx2_psrlv_d;
3999 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
4000 IID = Intrinsic::x86_avx2_psrlv_d_256;
4001 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
4002 IID = Intrinsic::x86_avx512_psrlv_w_128;
4003 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
4004 IID = Intrinsic::x86_avx512_psrlv_w_256;
4005 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
4006 IID = Intrinsic::x86_avx512_psrlv_w_512;
4007 else
4008 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4009 } else if (Name.ends_with(".128")) {
4010 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4011 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4012 : Intrinsic::x86_sse2_psrl_d;
4013 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4014 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4015 : Intrinsic::x86_sse2_psrl_q;
4016 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4017 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4018 : Intrinsic::x86_sse2_psrl_w;
4019 else
4020 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4021 } else if (Name.ends_with(".256")) {
4022 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4023 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4024 : Intrinsic::x86_avx2_psrl_d;
4025 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4026 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4027 : Intrinsic::x86_avx2_psrl_q;
4028 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4029 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4030 : Intrinsic::x86_avx2_psrl_w;
4031 else
4032 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4033 } else {
4034 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4035 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4036 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4037 : Intrinsic::x86_avx512_psrl_d_512;
4038 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4039 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4040 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4041 : Intrinsic::x86_avx512_psrl_q_512;
4042 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4043 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4044 : Intrinsic::x86_avx512_psrl_w_512;
4045 else
4046 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4047 }
4048
4049 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4050 } else if (Name.starts_with("avx512.mask.psra")) {
4051 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4052 bool IsVariable = Name[16] == 'v';
4053 char Size = Name[16] == '.' ? Name[17]
4054 : Name[17] == '.' ? Name[18]
4055 : Name[18] == '.' ? Name[19]
4056 : Name[20];
4057
4058 Intrinsic::ID IID;
4059 if (IsVariable && Name[17] != '.') {
4060 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4061 IID = Intrinsic::x86_avx2_psrav_d;
4062 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4063 IID = Intrinsic::x86_avx2_psrav_d_256;
4064 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4065 IID = Intrinsic::x86_avx512_psrav_w_128;
4066 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4067 IID = Intrinsic::x86_avx512_psrav_w_256;
4068 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4069 IID = Intrinsic::x86_avx512_psrav_w_512;
4070 else
4071 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4072 } else if (Name.ends_with(".128")) {
4073 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4074 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4075 : Intrinsic::x86_sse2_psra_d;
4076 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4077 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4078 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4079 : Intrinsic::x86_avx512_psra_q_128;
4080 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4081 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4082 : Intrinsic::x86_sse2_psra_w;
4083 else
4084 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4085 } else if (Name.ends_with(".256")) {
4086 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4087 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4088 : Intrinsic::x86_avx2_psra_d;
4089 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4090 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4091 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4092 : Intrinsic::x86_avx512_psra_q_256;
4093 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4094 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4095 : Intrinsic::x86_avx2_psra_w;
4096 else
4097 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4098 } else {
4099 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4100 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4101 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4102 : Intrinsic::x86_avx512_psra_d_512;
4103 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4104 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4105 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4106 : Intrinsic::x86_avx512_psra_q_512;
4107 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4108 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4109 : Intrinsic::x86_avx512_psra_w_512;
4110 else
4111 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4112 }
4113
4114 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4115 } else if (Name.starts_with("avx512.mask.move.s")) {
4116 Rep = upgradeMaskedMove(Builder, *CI);
4117 } else if (Name.starts_with("avx512.cvtmask2")) {
4118 Rep = upgradeMaskToInt(Builder, *CI);
4119 } else if (Name.ends_with(".movntdqa")) {
4121 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4122
4123 LoadInst *LI = Builder.CreateAlignedLoad(
4124 CI->getType(), CI->getArgOperand(0),
4126 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4127 Rep = LI;
4128 } else if (Name.starts_with("fma.vfmadd.") ||
4129 Name.starts_with("fma.vfmsub.") ||
4130 Name.starts_with("fma.vfnmadd.") ||
4131 Name.starts_with("fma.vfnmsub.")) {
4132 bool NegMul = Name[6] == 'n';
4133 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4134 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4135
4136 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4137 CI->getArgOperand(2)};
4138
4139 if (IsScalar) {
4140 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4141 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4142 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4143 }
4144
4145 if (NegMul && !IsScalar)
4146 Ops[0] = Builder.CreateFNeg(Ops[0]);
4147 if (NegMul && IsScalar)
4148 Ops[1] = Builder.CreateFNeg(Ops[1]);
4149 if (NegAcc)
4150 Ops[2] = Builder.CreateFNeg(Ops[2]);
4151
4152 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4153
4154 if (IsScalar)
4155 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4156 } else if (Name.starts_with("fma4.vfmadd.s")) {
4157 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4158 CI->getArgOperand(2)};
4159
4160 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4161 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4162 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4163
4164 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4165
4166 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4167 Rep, (uint64_t)0);
4168 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4169 Name.starts_with("avx512.maskz.vfmadd.s") ||
4170 Name.starts_with("avx512.mask3.vfmadd.s") ||
4171 Name.starts_with("avx512.mask3.vfmsub.s") ||
4172 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4173 bool IsMask3 = Name[11] == '3';
4174 bool IsMaskZ = Name[11] == 'z';
4175 // Drop the "avx512.mask." to make it easier.
4176 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4177 bool NegMul = Name[2] == 'n';
4178 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4179
4180 Value *A = CI->getArgOperand(0);
4181 Value *B = CI->getArgOperand(1);
4182 Value *C = CI->getArgOperand(2);
4183
4184 if (NegMul && (IsMask3 || IsMaskZ))
4185 A = Builder.CreateFNeg(A);
4186 if (NegMul && !(IsMask3 || IsMaskZ))
4187 B = Builder.CreateFNeg(B);
4188 if (NegAcc)
4189 C = Builder.CreateFNeg(C);
4190
4191 A = Builder.CreateExtractElement(A, (uint64_t)0);
4192 B = Builder.CreateExtractElement(B, (uint64_t)0);
4193 C = Builder.CreateExtractElement(C, (uint64_t)0);
4194
4195 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4196 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4197 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4198
4199 Intrinsic::ID IID;
4200 if (Name.back() == 'd')
4201 IID = Intrinsic::x86_avx512_vfmadd_f64;
4202 else
4203 IID = Intrinsic::x86_avx512_vfmadd_f32;
4204 Rep = Builder.CreateIntrinsic(IID, Ops);
4205 } else {
4206 Rep = Builder.CreateFMA(A, B, C);
4207 }
4208
4209 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4210 : IsMask3 ? C
4211 : A;
4212
4213 // For Mask3 with NegAcc, we need to create a new extractelement that
4214 // avoids the negation above.
4215 if (NegAcc && IsMask3)
4216 PassThru =
4217 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4218
4219 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4220 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4221 (uint64_t)0);
4222 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4223 Name.starts_with("avx512.mask.vfnmadd.p") ||
4224 Name.starts_with("avx512.mask.vfnmsub.p") ||
4225 Name.starts_with("avx512.mask3.vfmadd.p") ||
4226 Name.starts_with("avx512.mask3.vfmsub.p") ||
4227 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4228 Name.starts_with("avx512.maskz.vfmadd.p")) {
4229 bool IsMask3 = Name[11] == '3';
4230 bool IsMaskZ = Name[11] == 'z';
4231 // Drop the "avx512.mask." to make it easier.
4232 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4233 bool NegMul = Name[2] == 'n';
4234 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4235
4236 Value *A = CI->getArgOperand(0);
4237 Value *B = CI->getArgOperand(1);
4238 Value *C = CI->getArgOperand(2);
4239
4240 if (NegMul && (IsMask3 || IsMaskZ))
4241 A = Builder.CreateFNeg(A);
4242 if (NegMul && !(IsMask3 || IsMaskZ))
4243 B = Builder.CreateFNeg(B);
4244 if (NegAcc)
4245 C = Builder.CreateFNeg(C);
4246
4247 if (CI->arg_size() == 5 &&
4248 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4249 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4250 Intrinsic::ID IID;
4251 // Check the character before ".512" in string.
4252 if (Name[Name.size() - 5] == 's')
4253 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4254 else
4255 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4256
4257 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4258 } else {
4259 Rep = Builder.CreateFMA(A, B, C);
4260 }
4261
4262 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4263 : IsMask3 ? CI->getArgOperand(2)
4264 : CI->getArgOperand(0);
4265
4266 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4267 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4268 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4269 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4270 Intrinsic::ID IID;
4271 if (VecWidth == 128 && EltWidth == 32)
4272 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4273 else if (VecWidth == 256 && EltWidth == 32)
4274 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4275 else if (VecWidth == 128 && EltWidth == 64)
4276 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4277 else if (VecWidth == 256 && EltWidth == 64)
4278 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4279 else
4280 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4281
4282 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4283 CI->getArgOperand(2)};
4284 Ops[2] = Builder.CreateFNeg(Ops[2]);
4285 Rep = Builder.CreateIntrinsic(IID, Ops);
4286 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4287 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4288 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4289 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4290 bool IsMask3 = Name[11] == '3';
4291 bool IsMaskZ = Name[11] == 'z';
4292 // Drop the "avx512.mask." to make it easier.
4293 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4294 bool IsSubAdd = Name[3] == 's';
4295 if (CI->arg_size() == 5) {
4296 Intrinsic::ID IID;
4297 // Check the character before ".512" in string.
4298 if (Name[Name.size() - 5] == 's')
4299 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4300 else
4301 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4302
4303 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4304 CI->getArgOperand(2), CI->getArgOperand(4)};
4305 if (IsSubAdd)
4306 Ops[2] = Builder.CreateFNeg(Ops[2]);
4307
4308 Rep = Builder.CreateIntrinsic(IID, Ops);
4309 } else {
4310 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4311
4312 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4313 CI->getArgOperand(2)};
4314
4316 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4317 Value *Odd = Builder.CreateCall(FMA, Ops);
4318 Ops[2] = Builder.CreateFNeg(Ops[2]);
4319 Value *Even = Builder.CreateCall(FMA, Ops);
4320
4321 if (IsSubAdd)
4322 std::swap(Even, Odd);
4323
4324 SmallVector<int, 32> Idxs(NumElts);
4325 for (int i = 0; i != NumElts; ++i)
4326 Idxs[i] = i + (i % 2) * NumElts;
4327
4328 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4329 }
4330
4331 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4332 : IsMask3 ? CI->getArgOperand(2)
4333 : CI->getArgOperand(0);
4334
4335 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4336 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4337 Name.starts_with("avx512.maskz.pternlog.")) {
4338 bool ZeroMask = Name[11] == 'z';
4339 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4340 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4341 Intrinsic::ID IID;
4342 if (VecWidth == 128 && EltWidth == 32)
4343 IID = Intrinsic::x86_avx512_pternlog_d_128;
4344 else if (VecWidth == 256 && EltWidth == 32)
4345 IID = Intrinsic::x86_avx512_pternlog_d_256;
4346 else if (VecWidth == 512 && EltWidth == 32)
4347 IID = Intrinsic::x86_avx512_pternlog_d_512;
4348 else if (VecWidth == 128 && EltWidth == 64)
4349 IID = Intrinsic::x86_avx512_pternlog_q_128;
4350 else if (VecWidth == 256 && EltWidth == 64)
4351 IID = Intrinsic::x86_avx512_pternlog_q_256;
4352 else if (VecWidth == 512 && EltWidth == 64)
4353 IID = Intrinsic::x86_avx512_pternlog_q_512;
4354 else
4355 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4356
4357 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4358 CI->getArgOperand(2), CI->getArgOperand(3)};
4359 Rep = Builder.CreateIntrinsic(IID, Args);
4360 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4361 : CI->getArgOperand(0);
4362 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4363 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4364 Name.starts_with("avx512.maskz.vpmadd52")) {
4365 bool ZeroMask = Name[11] == 'z';
4366 bool High = Name[20] == 'h' || Name[21] == 'h';
4367 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4368 Intrinsic::ID IID;
4369 if (VecWidth == 128 && !High)
4370 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4371 else if (VecWidth == 256 && !High)
4372 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4373 else if (VecWidth == 512 && !High)
4374 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4375 else if (VecWidth == 128 && High)
4376 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4377 else if (VecWidth == 256 && High)
4378 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4379 else if (VecWidth == 512 && High)
4380 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4381 else
4382 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4383
4384 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4385 CI->getArgOperand(2)};
4386 Rep = Builder.CreateIntrinsic(IID, Args);
4387 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4388 : CI->getArgOperand(0);
4389 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4390 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4391 Name.starts_with("avx512.mask.vpermt2var.") ||
4392 Name.starts_with("avx512.maskz.vpermt2var.")) {
4393 bool ZeroMask = Name[11] == 'z';
4394 bool IndexForm = Name[17] == 'i';
4395 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4396 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4397 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4398 Name.starts_with("avx512.mask.vpdpbusds.") ||
4399 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4400 bool ZeroMask = Name[11] == 'z';
4401 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4402 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4403 Intrinsic::ID IID;
4404 if (VecWidth == 128 && !IsSaturating)
4405 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4406 else if (VecWidth == 256 && !IsSaturating)
4407 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4408 else if (VecWidth == 512 && !IsSaturating)
4409 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4410 else if (VecWidth == 128 && IsSaturating)
4411 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4412 else if (VecWidth == 256 && IsSaturating)
4413 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4414 else if (VecWidth == 512 && IsSaturating)
4415 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4416 else
4417 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4418
4419 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4420 CI->getArgOperand(2)};
4421
4422 // Input arguments types were incorrectly set to vectors of i32 before but
4423 // they should be vectors of i8. Insert bit cast when encountering the old
4424 // types
4425 if (Args[1]->getType()->isVectorTy() &&
4426 cast<VectorType>(Args[1]->getType())
4427 ->getElementType()
4428 ->isIntegerTy(32) &&
4429 Args[2]->getType()->isVectorTy() &&
4430 cast<VectorType>(Args[2]->getType())
4431 ->getElementType()
4432 ->isIntegerTy(32)) {
4433 Type *NewArgType = nullptr;
4434 if (VecWidth == 128)
4435 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4436 else if (VecWidth == 256)
4437 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4438 else if (VecWidth == 512)
4439 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4440 else
4441 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4442 CI);
4443
4444 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4445 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4446 }
4447
4448 Rep = Builder.CreateIntrinsic(IID, Args);
4449 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4450 : CI->getArgOperand(0);
4451 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4452 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4453 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4454 Name.starts_with("avx512.mask.vpdpwssds.") ||
4455 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4456 bool ZeroMask = Name[11] == 'z';
4457 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4458 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4459 Intrinsic::ID IID;
4460 if (VecWidth == 128 && !IsSaturating)
4461 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4462 else if (VecWidth == 256 && !IsSaturating)
4463 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4464 else if (VecWidth == 512 && !IsSaturating)
4465 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4466 else if (VecWidth == 128 && IsSaturating)
4467 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4468 else if (VecWidth == 256 && IsSaturating)
4469 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4470 else if (VecWidth == 512 && IsSaturating)
4471 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4472 else
4473 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4474
4475 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4476 CI->getArgOperand(2)};
4477
4478 // Input arguments types were incorrectly set to vectors of i32 before but
4479 // they should be vectors of i16. Insert bit cast when encountering the old
4480 // types
4481 if (Args[1]->getType()->isVectorTy() &&
4482 cast<VectorType>(Args[1]->getType())
4483 ->getElementType()
4484 ->isIntegerTy(32) &&
4485 Args[2]->getType()->isVectorTy() &&
4486 cast<VectorType>(Args[2]->getType())
4487 ->getElementType()
4488 ->isIntegerTy(32)) {
4489 Type *NewArgType = nullptr;
4490 if (VecWidth == 128)
4491 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4492 else if (VecWidth == 256)
4493 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4494 else if (VecWidth == 512)
4495 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4496 else
4497 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4498 CI);
4499
4500 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4501 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4502 }
4503
4504 Rep = Builder.CreateIntrinsic(IID, Args);
4505 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4506 : CI->getArgOperand(0);
4507 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4508 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4509 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4510 Name == "subborrow.u32" || Name == "subborrow.u64") {
4511 Intrinsic::ID IID;
4512 if (Name[0] == 'a' && Name.back() == '2')
4513 IID = Intrinsic::x86_addcarry_32;
4514 else if (Name[0] == 'a' && Name.back() == '4')
4515 IID = Intrinsic::x86_addcarry_64;
4516 else if (Name[0] == 's' && Name.back() == '2')
4517 IID = Intrinsic::x86_subborrow_32;
4518 else if (Name[0] == 's' && Name.back() == '4')
4519 IID = Intrinsic::x86_subborrow_64;
4520 else
4521 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4522
4523 // Make a call with 3 operands.
4524 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4525 CI->getArgOperand(2)};
4526 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4527
4528 // Extract the second result and store it.
4529 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4530 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4531 // Replace the original call result with the first result of the new call.
4532 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4533
4534 CI->replaceAllUsesWith(CF);
4535 Rep = nullptr;
4536 } else if (Name.starts_with("avx512.mask.") &&
4537 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4538 // Rep will be updated by the call in the condition.
4539 } else
4540 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4541
4542 return Rep;
4543}
4544
4546 Function *F, IRBuilder<> &Builder) {
4547 if (Name.starts_with("neon.bfcvt")) {
4548 if (Name.starts_with("neon.bfcvtn2")) {
4549 SmallVector<int, 32> LoMask(4);
4550 std::iota(LoMask.begin(), LoMask.end(), 0);
4551 SmallVector<int, 32> ConcatMask(8);
4552 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4553 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4554 Value *Trunc =
4555 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4556 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4557 } else if (Name.starts_with("neon.bfcvtn")) {
4558 SmallVector<int, 32> ConcatMask(8);
4559 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4560 Type *V4BF16 =
4561 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4562 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4563 dbgs() << "Trunc: " << *Trunc << "\n";
4564 return Builder.CreateShuffleVector(
4565 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4566 } else {
4567 return Builder.CreateFPTrunc(CI->getOperand(0),
4568 Type::getBFloatTy(F->getContext()));
4569 }
4570 } else if (Name.starts_with("sve.fcvt")) {
4571 Intrinsic::ID NewID =
4573 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4574 .Case("sve.fcvtnt.bf16f32",
4575 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4577 if (NewID == Intrinsic::not_intrinsic)
4578 llvm_unreachable("Unhandled Intrinsic!");
4579
4580 SmallVector<Value *, 3> Args(CI->args());
4581
4582 // The original intrinsics incorrectly used a predicate based on the
4583 // smallest element type rather than the largest.
4584 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4585 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4586
4587 if (Args[1]->getType() != BadPredTy)
4588 llvm_unreachable("Unexpected predicate type!");
4589
4590 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4591 BadPredTy, Args[1]);
4592 Args[1] = Builder.CreateIntrinsic(
4593 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4594
4595 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4596 CI->getName());
4597 }
4598
4599 llvm_unreachable("Unhandled Intrinsic!");
4600}
4601
4603 IRBuilder<> &Builder) {
4604 if (Name == "mve.vctp64.old") {
4605 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4606 // correct type.
4607 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4608 CI->getArgOperand(0),
4609 /*FMFSource=*/nullptr, CI->getName());
4610 Value *C1 = Builder.CreateIntrinsic(
4611 Intrinsic::arm_mve_pred_v2i,
4612 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4613 return Builder.CreateIntrinsic(
4614 Intrinsic::arm_mve_pred_i2v,
4615 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4616 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4617 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4618 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4619 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4620 Name ==
4621 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4622 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4623 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4624 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4625 Name ==
4626 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4627 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4628 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4629 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4630 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4631 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4632 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4633 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4634 std::vector<Type *> Tys;
4635 unsigned ID = CI->getIntrinsicID();
4636 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4637 switch (ID) {
4638 case Intrinsic::arm_mve_mull_int_predicated:
4639 case Intrinsic::arm_mve_vqdmull_predicated:
4640 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4641 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4642 break;
4643 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4644 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4645 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4646 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4647 V2I1Ty};
4648 break;
4649 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4650 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4651 CI->getOperand(1)->getType(), V2I1Ty};
4652 break;
4653 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4654 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4655 CI->getOperand(2)->getType(), V2I1Ty};
4656 break;
4657 case Intrinsic::arm_cde_vcx1q_predicated:
4658 case Intrinsic::arm_cde_vcx1qa_predicated:
4659 case Intrinsic::arm_cde_vcx2q_predicated:
4660 case Intrinsic::arm_cde_vcx2qa_predicated:
4661 case Intrinsic::arm_cde_vcx3q_predicated:
4662 case Intrinsic::arm_cde_vcx3qa_predicated:
4663 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4664 break;
4665 default:
4666 llvm_unreachable("Unhandled Intrinsic!");
4667 }
4668
4669 std::vector<Value *> Ops;
4670 for (Value *Op : CI->args()) {
4671 Type *Ty = Op->getType();
4672 if (Ty->getScalarSizeInBits() == 1) {
4673 Value *C1 = Builder.CreateIntrinsic(
4674 Intrinsic::arm_mve_pred_v2i,
4675 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4676 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4677 }
4678 Ops.push_back(Op);
4679 }
4680
4681 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4682 CI->getName());
4683 }
4684 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4685}
4686
4687// These are expected to have the arguments:
4688// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4689//
4690// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4691//
4693 Function *F, IRBuilder<> &Builder) {
4694 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4695 // for compatibility.
4696 auto UpgradeLegacyWMMAIUIntrinsicCall =
4697 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4698 ArrayRef<Type *> OverloadTys) -> Value * {
4699 // Prepare arguments, append clamp=0 for compatibility
4700 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4701 Args.push_back(Builder.getFalse());
4702
4703 // Insert the declaration for the right overload types
4705 F->getParent(), F->getIntrinsicID(), OverloadTys);
4706
4707 // Copy operand bundles if any
4709 CI->getOperandBundlesAsDefs(Bundles);
4710
4711 // Create the new call and copy calling properties
4712 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4713 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4714 NewCall->setCallingConv(CI->getCallingConv());
4715 NewCall->setAttributes(CI->getAttributes());
4716 NewCall->setDebugLoc(CI->getDebugLoc());
4717 NewCall->copyMetadata(*CI);
4718 return NewCall;
4719 };
4720
4721 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4722 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4723 "intrinsic should have 7 arguments");
4724 Type *T1 = CI->getArgOperand(4)->getType();
4725 Type *T2 = CI->getArgOperand(1)->getType();
4726 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4727 }
4728 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4729 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4730 "intrinsic should have 8 arguments");
4731 Type *T1 = CI->getArgOperand(4)->getType();
4732 Type *T2 = CI->getArgOperand(1)->getType();
4733 Type *T3 = CI->getArgOperand(3)->getType();
4734 Type *T4 = CI->getArgOperand(5)->getType();
4735 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4736 }
4737
4738 switch (F->getIntrinsicID()) {
4739 default:
4740 break;
4741 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4742 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4743 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4744 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4745 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4746 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4747 // Drop src0 and src1 modifiers.
4748 const Value *Op0 = CI->getArgOperand(0);
4749 const Value *Op2 = CI->getArgOperand(2);
4750 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4751 const ConstantInt *ModA = dyn_cast<ConstantInt>(Op0);
4752 const ConstantInt *ModB = dyn_cast<ConstantInt>(Op2);
4753 if (!ModA->isZero() || !ModB->isZero())
4754 reportFatalUsageError(Name + " matrix A and B modifiers shall be zero");
4755
4757 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4758 Args.push_back(CI->getArgOperand(I));
4759
4760 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4761 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4762 Overloads.push_back(Args[3]->getType());
4764 F->getParent(), F->getIntrinsicID(), Overloads);
4765
4767 CI->getOperandBundlesAsDefs(Bundles);
4768
4769 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4770 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4771 NewCall->setCallingConv(CI->getCallingConv());
4772 NewCall->setAttributes(CI->getAttributes());
4773 NewCall->setDebugLoc(CI->getDebugLoc());
4774 NewCall->copyMetadata(*CI);
4775 NewCall->takeName(CI);
4776 return NewCall;
4777 }
4778 }
4779
4780 AtomicRMWInst::BinOp RMWOp =
4782 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4783 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4784 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4785 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4786 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4787 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4788 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4789 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4790 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4791 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4792 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4793 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4794 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4795
4796 unsigned NumOperands = CI->getNumOperands();
4797 if (NumOperands < 3) // Malformed bitcode.
4798 return nullptr;
4799
4800 Value *Ptr = CI->getArgOperand(0);
4801 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4802 if (!PtrTy) // Malformed.
4803 return nullptr;
4804
4805 Value *Val = CI->getArgOperand(1);
4806 if (Val->getType() != CI->getType()) // Malformed.
4807 return nullptr;
4808
4809 ConstantInt *OrderArg = nullptr;
4810 bool IsVolatile = false;
4811
4812 // These should have 5 arguments (plus the callee). A separate version of the
4813 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4814 if (NumOperands > 3)
4815 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4816
4817 // Ignore scope argument at 3
4818
4819 if (NumOperands > 5) {
4820 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4821 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4822 }
4823
4825 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4826 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4829
4830 LLVMContext &Ctx = F->getContext();
4831
4832 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4833 Type *RetTy = CI->getType();
4834 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4835 if (VT->getElementType()->isIntegerTy(16)) {
4836 VectorType *AsBF16 =
4837 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4838 Val = Builder.CreateBitCast(Val, AsBF16);
4839 }
4840 }
4841
4842 // The scope argument never really worked correctly. Use agent as the most
4843 // conservative option which should still always produce the instruction.
4844 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4845 AtomicRMWInst *RMW =
4846 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4847
4848 unsigned AddrSpace = PtrTy->getAddressSpace();
4849 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4850 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4851 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4852 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4853 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4854 }
4855
4856 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4857 MDBuilder MDB(F->getContext());
4858 MDNode *RangeNotPrivate =
4861 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4862 }
4863
4864 if (IsVolatile)
4865 RMW->setVolatile(true);
4866
4867 return Builder.CreateBitCast(RMW, RetTy);
4868}
4869
4870/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4871/// plain MDNode, as it's the verifier's job to check these are the correct
4872/// types later.
4873static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4874 if (Op < CI->arg_size()) {
4875 if (MetadataAsValue *MAV =
4877 Metadata *MD = MAV->getMetadata();
4878 return dyn_cast_if_present<MDNode>(MD);
4879 }
4880 }
4881 return nullptr;
4882}
4883
4884/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4885static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4886 if (Op < CI->arg_size())
4888 return MAV->getMetadata();
4889 return nullptr;
4890}
4891
4893 // The MDNode attached to this instruction might not be the correct type,
4894 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4895 return I->getDebugLoc().getAsMDNode();
4896}
4897
4898/// Convert debug intrinsic calls to non-instruction debug records.
4899/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4900/// \p CI - The debug intrinsic call.
4902 DbgRecord *DR = nullptr;
4903 if (Name == "label") {
4905 CI->getDebugLoc());
4906 } else if (Name == "assign") {
4909 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4910 unwrapMAVMetadataOp(CI, 4),
4911 /*The address is a Value ref, it will be stored as a Metadata */
4912 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4913 } else if (Name == "declare") {
4916 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4917 getDebugLocSafe(CI));
4918 } else if (Name == "addr") {
4919 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4920 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4921 // Don't try to add something to the expression if it's not an expression.
4922 // Instead, allow the verifier to fail later.
4923 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4924 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4925 }
4928 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4929 getDebugLocSafe(CI));
4930 } else if (Name == "value") {
4931 // An old version of dbg.value had an extra offset argument.
4932 unsigned VarOp = 1;
4933 unsigned ExprOp = 2;
4934 if (CI->arg_size() == 4) {
4936 // Nonzero offset dbg.values get dropped without a replacement.
4937 if (!Offset || !Offset->isNullValue())
4938 return;
4939 VarOp = 2;
4940 ExprOp = 3;
4941 }
4944 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4945 nullptr, getDebugLocSafe(CI));
4946 }
4947 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4948 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4949}
4950
4953 if (!Offset)
4954 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4955 int64_t OffsetVal = Offset->getSExtValue();
4956 return Builder.CreateIntrinsic(OffsetVal >= 0
4957 ? Intrinsic::vector_splice_left
4958 : Intrinsic::vector_splice_right,
4959 CI->getType(),
4960 {CI->getArgOperand(0), CI->getArgOperand(1),
4961 Builder.getInt32(std::abs(OffsetVal))});
4962}
4963
4965 Function *F, IRBuilder<> &Builder) {
4966 if (Name.starts_with("to.fp16")) {
4967 Value *Cast =
4968 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
4969 return Builder.CreateBitCast(Cast, CI->getType());
4970 }
4971
4972 if (Name.starts_with("from.fp16")) {
4973 Value *Cast =
4974 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
4975 return Builder.CreateFPExt(Cast, CI->getType());
4976 }
4977
4978 return nullptr;
4979}
4980
4981/// Upgrade a call to an old intrinsic. All argument and return casting must be
4982/// provided to seamlessly integrate with existing context.
4984 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4985 // checks the callee's function type matches. It's likely we need to handle
4986 // type changes here.
4988 if (!F)
4989 return;
4990
4991 LLVMContext &C = CI->getContext();
4992 IRBuilder<> Builder(C);
4993 if (isa<FPMathOperator>(CI))
4994 Builder.setFastMathFlags(CI->getFastMathFlags());
4995 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4996
4997 if (!NewFn) {
4998 // Get the Function's name.
4999 StringRef Name = F->getName();
5000 if (!Name.consume_front("llvm."))
5001 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
5002
5003 bool IsX86 = Name.consume_front("x86.");
5004 bool IsNVVM = Name.consume_front("nvvm.");
5005 bool IsAArch64 = Name.consume_front("aarch64.");
5006 bool IsARM = Name.consume_front("arm.");
5007 bool IsAMDGCN = Name.consume_front("amdgcn.");
5008 bool IsDbg = Name.consume_front("dbg.");
5009 bool IsOldSplice =
5010 (Name.consume_front("experimental.vector.splice") ||
5011 Name.consume_front("vector.splice")) &&
5012 !(Name.starts_with(".left") || Name.starts_with(".right"));
5013 Value *Rep = nullptr;
5014
5015 if (!IsX86 && Name == "stackprotectorcheck") {
5016 Rep = nullptr;
5017 } else if (IsNVVM) {
5018 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5019 } else if (IsX86) {
5020 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5021 } else if (IsAArch64) {
5022 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5023 } else if (IsARM) {
5024 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5025 } else if (IsAMDGCN) {
5026 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5027 } else if (IsDbg) {
5029 } else if (IsOldSplice) {
5030 Rep = upgradeVectorSplice(CI, Builder);
5031 } else if (Name.consume_front("convert.")) {
5032 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5033 } else {
5034 llvm_unreachable("Unknown function for CallBase upgrade.");
5035 }
5036
5037 if (Rep)
5038 CI->replaceAllUsesWith(Rep);
5039 CI->eraseFromParent();
5040 return;
5041 }
5042
5043 const auto &DefaultCase = [&]() -> void {
5044 if (F == NewFn)
5045 return;
5046
5047 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5048 // Handle generic mangling change.
5049 assert(
5050 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5051 "Unknown function for CallBase upgrade and isn't just a name change");
5052 CI->setCalledFunction(NewFn);
5053 return;
5054 }
5055
5056 // This must be an upgrade from a named to a literal struct.
5057 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
5058 assert(OldST != NewFn->getReturnType() &&
5059 "Return type must have changed");
5060 assert(OldST->getNumElements() ==
5061 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5062 "Must have same number of elements");
5063
5064 SmallVector<Value *> Args(CI->args());
5065 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5066 NewCI->setAttributes(CI->getAttributes());
5067 Value *Res = PoisonValue::get(OldST);
5068 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5069 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5070 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5071 }
5072 CI->replaceAllUsesWith(Res);
5073 CI->eraseFromParent();
5074 return;
5075 }
5076
5077 // We're probably about to produce something invalid. Let the verifier catch
5078 // it instead of dying here.
5079 CI->setCalledOperand(
5081 return;
5082 };
5083 CallInst *NewCall = nullptr;
5084 switch (NewFn->getIntrinsicID()) {
5085 default: {
5086 DefaultCase();
5087 return;
5088 }
5089 case Intrinsic::arm_neon_vst1:
5090 case Intrinsic::arm_neon_vst2:
5091 case Intrinsic::arm_neon_vst3:
5092 case Intrinsic::arm_neon_vst4:
5093 case Intrinsic::arm_neon_vst2lane:
5094 case Intrinsic::arm_neon_vst3lane:
5095 case Intrinsic::arm_neon_vst4lane: {
5096 SmallVector<Value *, 4> Args(CI->args());
5097 NewCall = Builder.CreateCall(NewFn, Args);
5098 break;
5099 }
5100 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5101 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5102 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5103 LLVMContext &Ctx = F->getParent()->getContext();
5104 SmallVector<Value *, 4> Args(CI->args());
5105 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5106 cast<ConstantInt>(Args[3])->getZExtValue());
5107 NewCall = Builder.CreateCall(NewFn, Args);
5108 break;
5109 }
5110 case Intrinsic::aarch64_sve_ld3_sret:
5111 case Intrinsic::aarch64_sve_ld4_sret:
5112 case Intrinsic::aarch64_sve_ld2_sret: {
5113 // Is this a trivial remangle of the name to support ptr address spaces?
5114 if (isa<StructType>(F->getReturnType())) {
5115 DefaultCase();
5116 return;
5117 }
5118
5119 StringRef Name = F->getName();
5120 Name = Name.substr(5);
5121 unsigned N = StringSwitch<unsigned>(Name)
5122 .StartsWith("aarch64.sve.ld2", 2)
5123 .StartsWith("aarch64.sve.ld3", 3)
5124 .StartsWith("aarch64.sve.ld4", 4)
5125 .Default(0);
5126 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5127 unsigned MinElts = RetTy->getMinNumElements() / N;
5128 SmallVector<Value *, 2> Args(CI->args());
5129 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5130 Value *Ret = llvm::PoisonValue::get(RetTy);
5131 for (unsigned I = 0; I < N; I++) {
5132 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5133 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5134 }
5135 NewCall = dyn_cast<CallInst>(Ret);
5136 break;
5137 }
5138
5139 case Intrinsic::coro_end: {
5140 SmallVector<Value *, 3> Args(CI->args());
5141 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5142 NewCall = Builder.CreateCall(NewFn, Args);
5143 break;
5144 }
5145
5146 case Intrinsic::vector_extract: {
5147 StringRef Name = F->getName();
5148 Name = Name.substr(5); // Strip llvm
5149 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5150 DefaultCase();
5151 return;
5152 }
5153 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5154 unsigned MinElts = RetTy->getMinNumElements();
5155 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5156 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5157 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5158 break;
5159 }
5160
5161 case Intrinsic::vector_insert: {
5162 StringRef Name = F->getName();
5163 Name = Name.substr(5);
5164 if (!Name.starts_with("aarch64.sve.tuple")) {
5165 DefaultCase();
5166 return;
5167 }
5168 if (Name.starts_with("aarch64.sve.tuple.set")) {
5169 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5170 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5171 Value *NewIdx =
5172 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5173 NewCall = Builder.CreateCall(
5174 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5175 break;
5176 }
5177 if (Name.starts_with("aarch64.sve.tuple.create")) {
5178 unsigned N = StringSwitch<unsigned>(Name)
5179 .StartsWith("aarch64.sve.tuple.create2", 2)
5180 .StartsWith("aarch64.sve.tuple.create3", 3)
5181 .StartsWith("aarch64.sve.tuple.create4", 4)
5182 .Default(0);
5183 assert(N > 1 && "Create is expected to be between 2-4");
5184 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5185 Value *Ret = llvm::PoisonValue::get(RetTy);
5186 unsigned MinElts = RetTy->getMinNumElements() / N;
5187 for (unsigned I = 0; I < N; I++) {
5188 Value *V = CI->getArgOperand(I);
5189 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5190 }
5191 NewCall = dyn_cast<CallInst>(Ret);
5192 }
5193 break;
5194 }
5195
5196 case Intrinsic::arm_neon_bfdot:
5197 case Intrinsic::arm_neon_bfmmla:
5198 case Intrinsic::arm_neon_bfmlalb:
5199 case Intrinsic::arm_neon_bfmlalt:
5200 case Intrinsic::aarch64_neon_bfdot:
5201 case Intrinsic::aarch64_neon_bfmmla:
5202 case Intrinsic::aarch64_neon_bfmlalb:
5203 case Intrinsic::aarch64_neon_bfmlalt: {
5205 assert(CI->arg_size() == 3 &&
5206 "Mismatch between function args and call args");
5207 size_t OperandWidth =
5209 assert((OperandWidth == 64 || OperandWidth == 128) &&
5210 "Unexpected operand width");
5211 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5212 auto Iter = CI->args().begin();
5213 Args.push_back(*Iter++);
5214 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5215 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5216 NewCall = Builder.CreateCall(NewFn, Args);
5217 break;
5218 }
5219
5220 case Intrinsic::bitreverse:
5221 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5222 break;
5223
5224 case Intrinsic::ctlz:
5225 case Intrinsic::cttz: {
5226 if (CI->arg_size() != 1) {
5227 DefaultCase();
5228 return;
5229 }
5230
5231 NewCall =
5232 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5233 break;
5234 }
5235
5236 case Intrinsic::objectsize: {
5237 Value *NullIsUnknownSize =
5238 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5239 Value *Dynamic =
5240 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5241 NewCall = Builder.CreateCall(
5242 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5243 break;
5244 }
5245
5246 case Intrinsic::ctpop:
5247 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5248 break;
5249 case Intrinsic::dbg_value: {
5250 StringRef Name = F->getName();
5251 Name = Name.substr(5); // Strip llvm.
5252 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5253 if (Name.starts_with("dbg.addr")) {
5255 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5256 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5257 NewCall =
5258 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5259 MetadataAsValue::get(C, Expr)});
5260 break;
5261 }
5262
5263 // Upgrade from the old version that had an extra offset argument.
5264 assert(CI->arg_size() == 4);
5265 // Drop nonzero offsets instead of attempting to upgrade them.
5267 if (Offset->isNullValue()) {
5268 NewCall = Builder.CreateCall(
5269 NewFn,
5270 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5271 break;
5272 }
5273 CI->eraseFromParent();
5274 return;
5275 }
5276
5277 case Intrinsic::ptr_annotation:
5278 // Upgrade from versions that lacked the annotation attribute argument.
5279 if (CI->arg_size() != 4) {
5280 DefaultCase();
5281 return;
5282 }
5283
5284 // Create a new call with an added null annotation attribute argument.
5285 NewCall = Builder.CreateCall(
5286 NewFn,
5287 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5288 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5289 NewCall->takeName(CI);
5290 CI->replaceAllUsesWith(NewCall);
5291 CI->eraseFromParent();
5292 return;
5293
5294 case Intrinsic::var_annotation:
5295 // Upgrade from versions that lacked the annotation attribute argument.
5296 if (CI->arg_size() != 4) {
5297 DefaultCase();
5298 return;
5299 }
5300 // Create a new call with an added null annotation attribute argument.
5301 NewCall = Builder.CreateCall(
5302 NewFn,
5303 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5304 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5305 NewCall->takeName(CI);
5306 CI->replaceAllUsesWith(NewCall);
5307 CI->eraseFromParent();
5308 return;
5309
5310 case Intrinsic::riscv_aes32dsi:
5311 case Intrinsic::riscv_aes32dsmi:
5312 case Intrinsic::riscv_aes32esi:
5313 case Intrinsic::riscv_aes32esmi:
5314 case Intrinsic::riscv_sm4ks:
5315 case Intrinsic::riscv_sm4ed: {
5316 // The last argument to these intrinsics used to be i8 and changed to i32.
5317 // The type overload for sm4ks and sm4ed was removed.
5318 Value *Arg2 = CI->getArgOperand(2);
5319 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5320 return;
5321
5322 Value *Arg0 = CI->getArgOperand(0);
5323 Value *Arg1 = CI->getArgOperand(1);
5324 if (CI->getType()->isIntegerTy(64)) {
5325 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5326 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5327 }
5328
5329 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5330 cast<ConstantInt>(Arg2)->getZExtValue());
5331
5332 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5333 Value *Res = NewCall;
5334 if (Res->getType() != CI->getType())
5335 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5336 NewCall->takeName(CI);
5337 CI->replaceAllUsesWith(Res);
5338 CI->eraseFromParent();
5339 return;
5340 }
5341 case Intrinsic::nvvm_mapa_shared_cluster: {
5342 // Create a new call with the correct address space.
5343 NewCall =
5344 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5345 Value *Res = NewCall;
5346 Res = Builder.CreateAddrSpaceCast(
5347 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5348 NewCall->takeName(CI);
5349 CI->replaceAllUsesWith(Res);
5350 CI->eraseFromParent();
5351 return;
5352 }
5353 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5354 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5355 // Create a new call with the correct address space.
5356 SmallVector<Value *, 4> Args(CI->args());
5357 Args[0] = Builder.CreateAddrSpaceCast(
5358 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5359
5360 NewCall = Builder.CreateCall(NewFn, Args);
5361 NewCall->takeName(CI);
5362 CI->replaceAllUsesWith(NewCall);
5363 CI->eraseFromParent();
5364 return;
5365 }
5366 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5367 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5368 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5369 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5370 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5371 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5372 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5373 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5374 SmallVector<Value *, 16> Args(CI->args());
5375
5376 // Create AddrSpaceCast to shared_cluster if needed.
5377 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5378 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5380 Args[0] = Builder.CreateAddrSpaceCast(
5381 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5382
5383 // Attach the flag argument for cta_group, with a
5384 // default value of 0. This handles case (2) in
5385 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5386 size_t NumArgs = CI->arg_size();
5387 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5388 if (!FlagArg->getType()->isIntegerTy(1))
5389 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5390
5391 NewCall = Builder.CreateCall(NewFn, Args);
5392 NewCall->takeName(CI);
5393 CI->replaceAllUsesWith(NewCall);
5394 CI->eraseFromParent();
5395 return;
5396 }
5397 case Intrinsic::riscv_sha256sig0:
5398 case Intrinsic::riscv_sha256sig1:
5399 case Intrinsic::riscv_sha256sum0:
5400 case Intrinsic::riscv_sha256sum1:
5401 case Intrinsic::riscv_sm3p0:
5402 case Intrinsic::riscv_sm3p1: {
5403 // The last argument to these intrinsics used to be i8 and changed to i32.
5404 // The type overload for sm4ks and sm4ed was removed.
5405 if (!CI->getType()->isIntegerTy(64))
5406 return;
5407
5408 Value *Arg =
5409 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5410
5411 NewCall = Builder.CreateCall(NewFn, Arg);
5412 Value *Res =
5413 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5414 NewCall->takeName(CI);
5415 CI->replaceAllUsesWith(Res);
5416 CI->eraseFromParent();
5417 return;
5418 }
5419
5420 case Intrinsic::x86_xop_vfrcz_ss:
5421 case Intrinsic::x86_xop_vfrcz_sd:
5422 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5423 break;
5424
5425 case Intrinsic::x86_xop_vpermil2pd:
5426 case Intrinsic::x86_xop_vpermil2ps:
5427 case Intrinsic::x86_xop_vpermil2pd_256:
5428 case Intrinsic::x86_xop_vpermil2ps_256: {
5429 SmallVector<Value *, 4> Args(CI->args());
5430 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5431 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5432 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5433 NewCall = Builder.CreateCall(NewFn, Args);
5434 break;
5435 }
5436
5437 case Intrinsic::x86_sse41_ptestc:
5438 case Intrinsic::x86_sse41_ptestz:
5439 case Intrinsic::x86_sse41_ptestnzc: {
5440 // The arguments for these intrinsics used to be v4f32, and changed
5441 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5442 // So, the only thing required is a bitcast for both arguments.
5443 // First, check the arguments have the old type.
5444 Value *Arg0 = CI->getArgOperand(0);
5445 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5446 return;
5447
5448 // Old intrinsic, add bitcasts
5449 Value *Arg1 = CI->getArgOperand(1);
5450
5451 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5452
5453 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5454 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5455
5456 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5457 break;
5458 }
5459
5460 case Intrinsic::x86_rdtscp: {
5461 // This used to take 1 arguments. If we have no arguments, it is already
5462 // upgraded.
5463 if (CI->getNumOperands() == 0)
5464 return;
5465
5466 NewCall = Builder.CreateCall(NewFn);
5467 // Extract the second result and store it.
5468 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5469 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5470 // Replace the original call result with the first result of the new call.
5471 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5472
5473 NewCall->takeName(CI);
5474 CI->replaceAllUsesWith(TSC);
5475 CI->eraseFromParent();
5476 return;
5477 }
5478
5479 case Intrinsic::x86_sse41_insertps:
5480 case Intrinsic::x86_sse41_dppd:
5481 case Intrinsic::x86_sse41_dpps:
5482 case Intrinsic::x86_sse41_mpsadbw:
5483 case Intrinsic::x86_avx_dp_ps_256:
5484 case Intrinsic::x86_avx2_mpsadbw: {
5485 // Need to truncate the last argument from i32 to i8 -- this argument models
5486 // an inherently 8-bit immediate operand to these x86 instructions.
5487 SmallVector<Value *, 4> Args(CI->args());
5488
5489 // Replace the last argument with a trunc.
5490 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5491 NewCall = Builder.CreateCall(NewFn, Args);
5492 break;
5493 }
5494
5495 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5496 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5497 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5498 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5499 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5500 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5501 SmallVector<Value *, 4> Args(CI->args());
5502 unsigned NumElts =
5503 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5504 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5505
5506 NewCall = Builder.CreateCall(NewFn, Args);
5507 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5508
5509 NewCall->takeName(CI);
5510 CI->replaceAllUsesWith(Res);
5511 CI->eraseFromParent();
5512 return;
5513 }
5514
5515 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5516 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5517 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5518 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5519 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5520 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5521 SmallVector<Value *, 4> Args(CI->args());
5522 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5523 if (NewFn->getIntrinsicID() ==
5524 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5525 Args[1] = Builder.CreateBitCast(
5526 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5527
5528 NewCall = Builder.CreateCall(NewFn, Args);
5529 Value *Res = Builder.CreateBitCast(
5530 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5531
5532 NewCall->takeName(CI);
5533 CI->replaceAllUsesWith(Res);
5534 CI->eraseFromParent();
5535 return;
5536 }
5537 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5538 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5539 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5540 SmallVector<Value *, 4> Args(CI->args());
5541 unsigned NumElts =
5542 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5543 Args[1] = Builder.CreateBitCast(
5544 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5545 Args[2] = Builder.CreateBitCast(
5546 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5547
5548 NewCall = Builder.CreateCall(NewFn, Args);
5549 break;
5550 }
5551
5552 case Intrinsic::thread_pointer: {
5553 NewCall = Builder.CreateCall(NewFn, {});
5554 break;
5555 }
5556
5557 case Intrinsic::memcpy:
5558 case Intrinsic::memmove:
5559 case Intrinsic::memset: {
5560 // We have to make sure that the call signature is what we're expecting.
5561 // We only want to change the old signatures by removing the alignment arg:
5562 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5563 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5564 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5565 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5566 // Note: i8*'s in the above can be any pointer type
5567 if (CI->arg_size() != 5) {
5568 DefaultCase();
5569 return;
5570 }
5571 // Remove alignment argument (3), and add alignment attributes to the
5572 // dest/src pointers.
5573 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5574 CI->getArgOperand(2), CI->getArgOperand(4)};
5575 NewCall = Builder.CreateCall(NewFn, Args);
5576 AttributeList OldAttrs = CI->getAttributes();
5577 AttributeList NewAttrs = AttributeList::get(
5578 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5579 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5580 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5581 NewCall->setAttributes(NewAttrs);
5582 auto *MemCI = cast<MemIntrinsic>(NewCall);
5583 // All mem intrinsics support dest alignment.
5585 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5586 // Memcpy/Memmove also support source alignment.
5587 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5588 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5589 break;
5590 }
5591
5592 case Intrinsic::masked_load:
5593 case Intrinsic::masked_gather:
5594 case Intrinsic::masked_store:
5595 case Intrinsic::masked_scatter: {
5596 if (CI->arg_size() != 4) {
5597 DefaultCase();
5598 return;
5599 }
5600
5601 auto GetMaybeAlign = [](Value *Op) {
5602 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5603 uint64_t Val = CI->getZExtValue();
5604 if (Val == 0)
5605 return MaybeAlign();
5606 if (isPowerOf2_64(Val))
5607 return MaybeAlign(Val);
5608 }
5609 reportFatalUsageError("Invalid alignment argument");
5610 };
5611 auto GetAlign = [&](Value *Op) {
5612 MaybeAlign Align = GetMaybeAlign(Op);
5613 if (Align)
5614 return *Align;
5615 reportFatalUsageError("Invalid zero alignment argument");
5616 };
5617
5618 const DataLayout &DL = CI->getDataLayout();
5619 switch (NewFn->getIntrinsicID()) {
5620 case Intrinsic::masked_load:
5621 NewCall = Builder.CreateMaskedLoad(
5622 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5623 CI->getArgOperand(2), CI->getArgOperand(3));
5624 break;
5625 case Intrinsic::masked_gather:
5626 NewCall = Builder.CreateMaskedGather(
5627 CI->getType(), CI->getArgOperand(0),
5628 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5629 CI->getType()->getScalarType()),
5630 CI->getArgOperand(2), CI->getArgOperand(3));
5631 break;
5632 case Intrinsic::masked_store:
5633 NewCall = Builder.CreateMaskedStore(
5634 CI->getArgOperand(0), CI->getArgOperand(1),
5635 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5636 break;
5637 case Intrinsic::masked_scatter:
5638 NewCall = Builder.CreateMaskedScatter(
5639 CI->getArgOperand(0), CI->getArgOperand(1),
5640 DL.getValueOrABITypeAlignment(
5641 GetMaybeAlign(CI->getArgOperand(2)),
5642 CI->getArgOperand(0)->getType()->getScalarType()),
5643 CI->getArgOperand(3));
5644 break;
5645 default:
5646 llvm_unreachable("Unexpected intrinsic ID");
5647 }
5648 // Previous metadata is still valid.
5649 NewCall->copyMetadata(*CI);
5650 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5651 break;
5652 }
5653
5654 case Intrinsic::lifetime_start:
5655 case Intrinsic::lifetime_end: {
5656 if (CI->arg_size() != 2) {
5657 DefaultCase();
5658 return;
5659 }
5660
5661 Value *Ptr = CI->getArgOperand(1);
5662 // Try to strip pointer casts, such that the lifetime works on an alloca.
5663 Ptr = Ptr->stripPointerCasts();
5664 if (isa<AllocaInst>(Ptr)) {
5665 // Don't use NewFn, as we might have looked through an addrspacecast.
5666 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5667 NewCall = Builder.CreateLifetimeStart(Ptr);
5668 else
5669 NewCall = Builder.CreateLifetimeEnd(Ptr);
5670 break;
5671 }
5672
5673 // Otherwise remove the lifetime marker.
5674 CI->eraseFromParent();
5675 return;
5676 }
5677
5678 case Intrinsic::x86_avx512_vpdpbusd_128:
5679 case Intrinsic::x86_avx512_vpdpbusd_256:
5680 case Intrinsic::x86_avx512_vpdpbusd_512:
5681 case Intrinsic::x86_avx512_vpdpbusds_128:
5682 case Intrinsic::x86_avx512_vpdpbusds_256:
5683 case Intrinsic::x86_avx512_vpdpbusds_512:
5684 case Intrinsic::x86_avx2_vpdpbssd_128:
5685 case Intrinsic::x86_avx2_vpdpbssd_256:
5686 case Intrinsic::x86_avx10_vpdpbssd_512:
5687 case Intrinsic::x86_avx2_vpdpbssds_128:
5688 case Intrinsic::x86_avx2_vpdpbssds_256:
5689 case Intrinsic::x86_avx10_vpdpbssds_512:
5690 case Intrinsic::x86_avx2_vpdpbsud_128:
5691 case Intrinsic::x86_avx2_vpdpbsud_256:
5692 case Intrinsic::x86_avx10_vpdpbsud_512:
5693 case Intrinsic::x86_avx2_vpdpbsuds_128:
5694 case Intrinsic::x86_avx2_vpdpbsuds_256:
5695 case Intrinsic::x86_avx10_vpdpbsuds_512:
5696 case Intrinsic::x86_avx2_vpdpbuud_128:
5697 case Intrinsic::x86_avx2_vpdpbuud_256:
5698 case Intrinsic::x86_avx10_vpdpbuud_512:
5699 case Intrinsic::x86_avx2_vpdpbuuds_128:
5700 case Intrinsic::x86_avx2_vpdpbuuds_256:
5701 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5702 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5703 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5704 CI->getArgOperand(2)};
5705 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5706 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5707 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5708
5709 NewCall = Builder.CreateCall(NewFn, Args);
5710 break;
5711 }
5712 case Intrinsic::x86_avx512_vpdpwssd_128:
5713 case Intrinsic::x86_avx512_vpdpwssd_256:
5714 case Intrinsic::x86_avx512_vpdpwssd_512:
5715 case Intrinsic::x86_avx512_vpdpwssds_128:
5716 case Intrinsic::x86_avx512_vpdpwssds_256:
5717 case Intrinsic::x86_avx512_vpdpwssds_512:
5718 case Intrinsic::x86_avx2_vpdpwsud_128:
5719 case Intrinsic::x86_avx2_vpdpwsud_256:
5720 case Intrinsic::x86_avx10_vpdpwsud_512:
5721 case Intrinsic::x86_avx2_vpdpwsuds_128:
5722 case Intrinsic::x86_avx2_vpdpwsuds_256:
5723 case Intrinsic::x86_avx10_vpdpwsuds_512:
5724 case Intrinsic::x86_avx2_vpdpwusd_128:
5725 case Intrinsic::x86_avx2_vpdpwusd_256:
5726 case Intrinsic::x86_avx10_vpdpwusd_512:
5727 case Intrinsic::x86_avx2_vpdpwusds_128:
5728 case Intrinsic::x86_avx2_vpdpwusds_256:
5729 case Intrinsic::x86_avx10_vpdpwusds_512:
5730 case Intrinsic::x86_avx2_vpdpwuud_128:
5731 case Intrinsic::x86_avx2_vpdpwuud_256:
5732 case Intrinsic::x86_avx10_vpdpwuud_512:
5733 case Intrinsic::x86_avx2_vpdpwuuds_128:
5734 case Intrinsic::x86_avx2_vpdpwuuds_256:
5735 case Intrinsic::x86_avx10_vpdpwuuds_512:
5736 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5737 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5738 CI->getArgOperand(2)};
5739 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5740 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5741 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5742
5743 NewCall = Builder.CreateCall(NewFn, Args);
5744 break;
5745 }
5746 assert(NewCall && "Should have either set this variable or returned through "
5747 "the default case");
5748 NewCall->takeName(CI);
5749 CI->replaceAllUsesWith(NewCall);
5750 CI->eraseFromParent();
5751}
5752
5754 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5755
5756 // Check if this function should be upgraded and get the replacement function
5757 // if there is one.
5758 Function *NewFn;
5759 if (UpgradeIntrinsicFunction(F, NewFn)) {
5760 // Replace all users of the old function with the new function or new
5761 // instructions. This is not a range loop because the call is deleted.
5762 for (User *U : make_early_inc_range(F->users()))
5763 if (CallBase *CB = dyn_cast<CallBase>(U))
5764 UpgradeIntrinsicCall(CB, NewFn);
5765
5766 // Remove old function, no longer used, from the module.
5767 if (F != NewFn)
5768 F->eraseFromParent();
5769 }
5770}
5771
5773 const unsigned NumOperands = MD.getNumOperands();
5774 if (NumOperands == 0)
5775 return &MD; // Invalid, punt to a verifier error.
5776
5777 // Check if the tag uses struct-path aware TBAA format.
5778 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5779 return &MD;
5780
5781 auto &Context = MD.getContext();
5782 if (NumOperands == 3) {
5783 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5784 MDNode *ScalarType = MDNode::get(Context, Elts);
5785 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5786 Metadata *Elts2[] = {ScalarType, ScalarType,
5789 MD.getOperand(2)};
5790 return MDNode::get(Context, Elts2);
5791 }
5792 // Create a MDNode <MD, MD, offset 0>
5794 Type::getInt64Ty(Context)))};
5795 return MDNode::get(Context, Elts);
5796}
5797
5799 Instruction *&Temp) {
5800 if (Opc != Instruction::BitCast)
5801 return nullptr;
5802
5803 Temp = nullptr;
5804 Type *SrcTy = V->getType();
5805 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5806 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5807 LLVMContext &Context = V->getContext();
5808
5809 // We have no information about target data layout, so we assume that
5810 // the maximum pointer size is 64bit.
5811 Type *MidTy = Type::getInt64Ty(Context);
5812 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5813
5814 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5815 }
5816
5817 return nullptr;
5818}
5819
5821 if (Opc != Instruction::BitCast)
5822 return nullptr;
5823
5824 Type *SrcTy = C->getType();
5825 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5826 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5827 LLVMContext &Context = C->getContext();
5828
5829 // We have no information about target data layout, so we assume that
5830 // the maximum pointer size is 64bit.
5831 Type *MidTy = Type::getInt64Ty(Context);
5832
5834 DestTy);
5835 }
5836
5837 return nullptr;
5838}
5839
5840/// Check the debug info version number, if it is out-dated, drop the debug
5841/// info. Return true if module is modified.
5844 return false;
5845
5846 llvm::TimeTraceScope timeScope("Upgrade debug info");
5847 // We need to get metadata before the module is verified (i.e., getModuleFlag
5848 // makes assumptions that we haven't verified yet). Carefully extract the flag
5849 // from the metadata.
5850 unsigned Version = 0;
5851 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5852 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5853 if (Flag->getNumOperands() < 3)
5854 return false;
5855 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5856 return K->getString() == "Debug Info Version";
5857 return false;
5858 });
5859 if (OpIt != ModFlags->op_end()) {
5860 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5861 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5862 Version = CI->getZExtValue();
5863 }
5864 }
5865
5867 bool BrokenDebugInfo = false;
5868 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5869 report_fatal_error("Broken module found, compilation aborted!");
5870 if (!BrokenDebugInfo)
5871 // Everything is ok.
5872 return false;
5873 else {
5874 // Diagnose malformed debug info.
5876 M.getContext().diagnose(Diag);
5877 }
5878 }
5879 bool Modified = StripDebugInfo(M);
5881 // Diagnose a version mismatch.
5883 M.getContext().diagnose(DiagVersion);
5884 }
5885 return Modified;
5886}
5887
5888static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5889 GlobalValue *GV, const Metadata *V) {
5890 Function *F = cast<Function>(GV);
5891
5892 constexpr StringLiteral DefaultValue = "1";
5893 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5894 unsigned Length = 0;
5895
5896 if (F->hasFnAttribute(Attr)) {
5897 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5898 // parse these elements placing them into Vect3
5899 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5900 for (; Length < 3 && !S.empty(); Length++) {
5901 auto [Part, Rest] = S.split(',');
5902 Vect3[Length] = Part.trim();
5903 S = Rest;
5904 }
5905 }
5906
5907 const unsigned Dim = DimC - 'x';
5908 assert(Dim < 3 && "Unexpected dim char");
5909
5910 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5911
5912 // local variable required for StringRef in Vect3 to point to.
5913 const std::string VStr = llvm::utostr(VInt);
5914 Vect3[Dim] = VStr;
5915 Length = std::max(Length, Dim + 1);
5916
5917 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5918 F->addFnAttr(Attr, NewAttr);
5919}
5920
5921static inline bool isXYZ(StringRef S) {
5922 return S == "x" || S == "y" || S == "z";
5923}
5924
5926 const Metadata *V) {
5927 if (K == "kernel") {
5929 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5930 return true;
5931 }
5932 if (K == "align") {
5933 // V is a bitfeild specifying two 16-bit values. The alignment value is
5934 // specfied in low 16-bits, The index is specified in the high bits. For the
5935 // index, 0 indicates the return value while higher values correspond to
5936 // each parameter (idx = param + 1).
5937 const uint64_t AlignIdxValuePair =
5938 mdconst::extract<ConstantInt>(V)->getZExtValue();
5939 const unsigned Idx = (AlignIdxValuePair >> 16);
5940 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5941 cast<Function>(GV)->addAttributeAtIndex(
5942 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5943 return true;
5944 }
5945 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5946 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5948 return true;
5949 }
5950 if (K == "minctasm") {
5951 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5952 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
5953 return true;
5954 }
5955 if (K == "maxnreg") {
5956 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5957 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
5958 return true;
5959 }
5960 if (K.consume_front("maxntid") && isXYZ(K)) {
5962 return true;
5963 }
5964 if (K.consume_front("reqntid") && isXYZ(K)) {
5966 return true;
5967 }
5968 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5970 return true;
5971 }
5972 if (K == "grid_constant") {
5973 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
5974 for (const auto &Op : cast<MDNode>(V)->operands()) {
5975 // For some reason, the index is 1-based in the metadata. Good thing we're
5976 // able to auto-upgrade it!
5977 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5978 cast<Function>(GV)->addParamAttr(Index, Attr);
5979 }
5980 return true;
5981 }
5982
5983 return false;
5984}
5985
5987 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5988 if (!NamedMD)
5989 return;
5990
5991 SmallVector<MDNode *, 8> NewNodes;
5993 for (MDNode *MD : NamedMD->operands()) {
5994 if (!SeenNodes.insert(MD).second)
5995 continue;
5996
5997 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5998 if (!GV)
5999 continue;
6000
6001 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
6002
6003 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
6004 // Each nvvm.annotations metadata entry will be of the following form:
6005 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
6006 // start index = 1, to skip the global variable key
6007 // increment = 2, to skip the value for each property-value pairs
6008 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6009 MDString *K = cast<MDString>(MD->getOperand(j));
6010 const MDOperand &V = MD->getOperand(j + 1);
6011 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
6012 if (!Upgraded)
6013 NewOperands.append({K, V});
6014 }
6015
6016 if (NewOperands.size() > 1)
6017 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
6018 }
6019
6020 NamedMD->clearOperands();
6021 for (MDNode *N : NewNodes)
6022 NamedMD->addOperand(N);
6023}
6024
6025/// This checks for objc retain release marker which should be upgraded. It
6026/// returns true if module is modified.
6028 bool Changed = false;
6029 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6030 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6031 if (ModRetainReleaseMarker) {
6032 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
6033 if (Op) {
6034 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
6035 if (ID) {
6036 SmallVector<StringRef, 4> ValueComp;
6037 ID->getString().split(ValueComp, "#");
6038 if (ValueComp.size() == 2) {
6039 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6040 ID = MDString::get(M.getContext(), NewValue);
6041 }
6042 M.addModuleFlag(Module::Error, MarkerKey, ID);
6043 M.eraseNamedMetadata(ModRetainReleaseMarker);
6044 Changed = true;
6045 }
6046 }
6047 }
6048 return Changed;
6049}
6050
6052 // This lambda converts normal function calls to ARC runtime functions to
6053 // intrinsic calls.
6054 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6055 llvm::Intrinsic::ID IntrinsicFunc) {
6056 Function *Fn = M.getFunction(OldFunc);
6057
6058 if (!Fn)
6059 return;
6060
6061 Function *NewFn =
6062 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
6063
6064 for (User *U : make_early_inc_range(Fn->users())) {
6066 if (!CI || CI->getCalledFunction() != Fn)
6067 continue;
6068
6069 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6070 FunctionType *NewFuncTy = NewFn->getFunctionType();
6072
6073 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6074 // value to the return type of the old function.
6075 if (NewFuncTy->getReturnType() != CI->getType() &&
6076 !CastInst::castIsValid(Instruction::BitCast, CI,
6077 NewFuncTy->getReturnType()))
6078 continue;
6079
6080 bool InvalidCast = false;
6081
6082 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6083 Value *Arg = CI->getArgOperand(I);
6084
6085 // Bitcast argument to the parameter type of the new function if it's
6086 // not a variadic argument.
6087 if (I < NewFuncTy->getNumParams()) {
6088 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6089 // to the parameter type of the new function.
6090 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6091 NewFuncTy->getParamType(I))) {
6092 InvalidCast = true;
6093 break;
6094 }
6095 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6096 }
6097 Args.push_back(Arg);
6098 }
6099
6100 if (InvalidCast)
6101 continue;
6102
6103 // Create a call instruction that calls the new function.
6104 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6105 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6106 NewCall->takeName(CI);
6107
6108 // Bitcast the return value back to the type of the old call.
6109 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6110
6111 if (!CI->use_empty())
6112 CI->replaceAllUsesWith(NewRetVal);
6113 CI->eraseFromParent();
6114 }
6115
6116 if (Fn->use_empty())
6117 Fn->eraseFromParent();
6118 };
6119
6120 // Unconditionally convert a call to "clang.arc.use" to a call to
6121 // "llvm.objc.clang.arc.use".
6122 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6123
6124 // Upgrade the retain release marker. If there is no need to upgrade
6125 // the marker, that means either the module is already new enough to contain
6126 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6128 return;
6129
6130 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6131 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6132 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6133 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6134 {"objc_autoreleaseReturnValue",
6135 llvm::Intrinsic::objc_autoreleaseReturnValue},
6136 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6137 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6138 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6139 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6140 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6141 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6142 {"objc_release", llvm::Intrinsic::objc_release},
6143 {"objc_retain", llvm::Intrinsic::objc_retain},
6144 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6145 {"objc_retainAutoreleaseReturnValue",
6146 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6147 {"objc_retainAutoreleasedReturnValue",
6148 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6149 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6150 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6151 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6152 {"objc_unsafeClaimAutoreleasedReturnValue",
6153 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6154 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6155 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6156 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6157 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6158 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6159 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6160 {"objc_arc_annotation_topdown_bbstart",
6161 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6162 {"objc_arc_annotation_topdown_bbend",
6163 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6164 {"objc_arc_annotation_bottomup_bbstart",
6165 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6166 {"objc_arc_annotation_bottomup_bbend",
6167 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6168
6169 for (auto &I : RuntimeFuncs)
6170 UpgradeToIntrinsic(I.first, I.second);
6171}
6172
6174 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6175 if (!ModFlags)
6176 return false;
6177
6178 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6179 bool HasSwiftVersionFlag = false;
6180 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6181 uint32_t SwiftABIVersion;
6182 auto Int8Ty = Type::getInt8Ty(M.getContext());
6183 auto Int32Ty = Type::getInt32Ty(M.getContext());
6184
6185 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6186 MDNode *Op = ModFlags->getOperand(I);
6187 if (Op->getNumOperands() != 3)
6188 continue;
6189 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6190 if (!ID)
6191 continue;
6192 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6193 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6194 Type::getInt32Ty(M.getContext()), B)),
6195 MDString::get(M.getContext(), ID->getString()),
6196 Op->getOperand(2)};
6197 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6198 Changed = true;
6199 };
6200
6201 if (ID->getString() == "Objective-C Image Info Version")
6202 HasObjCFlag = true;
6203 if (ID->getString() == "Objective-C Class Properties")
6204 HasClassProperties = true;
6205 // Upgrade PIC from Error/Max to Min.
6206 if (ID->getString() == "PIC Level") {
6207 if (auto *Behavior =
6209 uint64_t V = Behavior->getLimitedValue();
6210 if (V == Module::Error || V == Module::Max)
6211 SetBehavior(Module::Min);
6212 }
6213 }
6214 // Upgrade "PIE Level" from Error to Max.
6215 if (ID->getString() == "PIE Level")
6216 if (auto *Behavior =
6218 if (Behavior->getLimitedValue() == Module::Error)
6219 SetBehavior(Module::Max);
6220
6221 // Upgrade branch protection and return address signing module flags. The
6222 // module flag behavior for these fields were Error and now they are Min.
6223 if (ID->getString() == "branch-target-enforcement" ||
6224 ID->getString().starts_with("sign-return-address")) {
6225 if (auto *Behavior =
6227 if (Behavior->getLimitedValue() == Module::Error) {
6228 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6229 Metadata *Ops[3] = {
6230 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6231 Op->getOperand(1), Op->getOperand(2)};
6232 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6233 Changed = true;
6234 }
6235 }
6236 }
6237
6238 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6239 // section name so that llvm-lto will not complain about mismatching
6240 // module flags that is functionally the same.
6241 if (ID->getString() == "Objective-C Image Info Section") {
6242 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6243 SmallVector<StringRef, 4> ValueComp;
6244 Value->getString().split(ValueComp, " ");
6245 if (ValueComp.size() != 1) {
6246 std::string NewValue;
6247 for (auto &S : ValueComp)
6248 NewValue += S.str();
6249 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6250 MDString::get(M.getContext(), NewValue)};
6251 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6252 Changed = true;
6253 }
6254 }
6255 }
6256
6257 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6258 // If the higher bits are set, it adds new module flag for swift info.
6259 if (ID->getString() == "Objective-C Garbage Collection") {
6260 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6261 if (Md) {
6262 assert(Md->getValue() && "Expected non-empty metadata");
6263 auto Type = Md->getValue()->getType();
6264 if (Type == Int8Ty)
6265 continue;
6266 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6267 if ((Val & 0xff) != Val) {
6268 HasSwiftVersionFlag = true;
6269 SwiftABIVersion = (Val & 0xff00) >> 8;
6270 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6271 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6272 }
6273 Metadata *Ops[3] = {
6275 Op->getOperand(1),
6276 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6277 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6278 Changed = true;
6279 }
6280 }
6281
6282 if (ID->getString() == "amdgpu_code_object_version") {
6283 Metadata *Ops[3] = {
6284 Op->getOperand(0),
6285 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6286 Op->getOperand(2)};
6287 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6288 Changed = true;
6289 }
6290 }
6291
6292 // "Objective-C Class Properties" is recently added for Objective-C. We
6293 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6294 // flag of value 0, so we can correclty downgrade this flag when trying to
6295 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6296 // this module flag.
6297 if (HasObjCFlag && !HasClassProperties) {
6298 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6299 (uint32_t)0);
6300 Changed = true;
6301 }
6302
6303 if (HasSwiftVersionFlag) {
6304 M.addModuleFlag(Module::Error, "Swift ABI Version",
6305 SwiftABIVersion);
6306 M.addModuleFlag(Module::Error, "Swift Major Version",
6307 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6308 M.addModuleFlag(Module::Error, "Swift Minor Version",
6309 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6310 Changed = true;
6311 }
6312
6313 return Changed;
6314}
6315
6317 auto TrimSpaces = [](StringRef Section) -> std::string {
6318 SmallVector<StringRef, 5> Components;
6319 Section.split(Components, ',');
6320
6321 SmallString<32> Buffer;
6322 raw_svector_ostream OS(Buffer);
6323
6324 for (auto Component : Components)
6325 OS << ',' << Component.trim();
6326
6327 return std::string(OS.str().substr(1));
6328 };
6329
6330 for (auto &GV : M.globals()) {
6331 if (!GV.hasSection())
6332 continue;
6333
6334 StringRef Section = GV.getSection();
6335
6336 if (!Section.starts_with("__DATA, __objc_catlist"))
6337 continue;
6338
6339 // __DATA, __objc_catlist, regular, no_dead_strip
6340 // __DATA,__objc_catlist,regular,no_dead_strip
6341 GV.setSection(TrimSpaces(Section));
6342 }
6343}
6344
6345namespace {
6346// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6347// callsites within a function that did not also have the strictfp attribute.
6348// Since 10.0, if strict FP semantics are needed within a function, the
6349// function must have the strictfp attribute and all calls within the function
6350// must also have the strictfp attribute. This latter restriction is
6351// necessary to prevent unwanted libcall simplification when a function is
6352// being cloned (such as for inlining).
6353//
6354// The "dangling" strictfp attribute usage was only used to prevent constant
6355// folding and other libcall simplification. The nobuiltin attribute on the
6356// callsite has the same effect.
6357struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6358 StrictFPUpgradeVisitor() = default;
6359
6360 void visitCallBase(CallBase &Call) {
6361 if (!Call.isStrictFP())
6362 return;
6364 return;
6365 // If we get here, the caller doesn't have the strictfp attribute
6366 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6367 Call.removeFnAttr(Attribute::StrictFP);
6368 Call.addFnAttr(Attribute::NoBuiltin);
6369 }
6370};
6371
6372/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6373struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6374 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6375 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6376
6377 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6378 if (!RMW.isFloatingPointOperation())
6379 return;
6380
6381 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6382 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6383 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6384 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6385 }
6386};
6387} // namespace
6388
6390 // If a function definition doesn't have the strictfp attribute,
6391 // convert any callsite strictfp attributes to nobuiltin.
6392 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6393 StrictFPUpgradeVisitor SFPV;
6394 SFPV.visit(F);
6395 }
6396
6397 // Remove all incompatibile attributes from function.
6398 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6399 F.getReturnType(), F.getAttributes().getRetAttrs()));
6400 for (auto &Arg : F.args())
6401 Arg.removeAttrs(
6402 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6403
6404 bool AddingAttrs = false, RemovingAttrs = false;
6405 AttrBuilder AttrsToAdd(F.getContext());
6406 AttributeMask AttrsToRemove;
6407
6408 // Older versions of LLVM treated an "implicit-section-name" attribute
6409 // similarly to directly setting the section on a Function.
6410 if (Attribute A = F.getFnAttribute("implicit-section-name");
6411 A.isValid() && A.isStringAttribute()) {
6412 F.setSection(A.getValueAsString());
6413 AttrsToRemove.addAttribute("implicit-section-name");
6414 RemovingAttrs = true;
6415 }
6416
6417 if (Attribute A = F.getFnAttribute("nooutline");
6418 A.isValid() && A.isStringAttribute()) {
6419 AttrsToRemove.addAttribute("nooutline");
6420 AttrsToAdd.addAttribute(Attribute::NoOutline);
6421 AddingAttrs = RemovingAttrs = true;
6422 }
6423
6424 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6425 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6426 AttrsToRemove.addAttribute("uniform-work-group-size");
6427 RemovingAttrs = true;
6428 if (A.getValueAsString() == "true") {
6429 AttrsToAdd.addAttribute("uniform-work-group-size");
6430 AddingAttrs = true;
6431 }
6432 }
6433
6434 if (!F.empty()) {
6435 // For some reason this is called twice, and the first time is before any
6436 // instructions are loaded into the body.
6437
6438 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6439 A.isValid()) {
6440
6441 if (A.getValueAsBool()) {
6442 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6443 Visitor.visit(F);
6444 }
6445
6446 // We will leave behind dead attribute uses on external declarations, but
6447 // clang never added these to declarations anyway.
6448 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6449 RemovingAttrs = true;
6450 }
6451 }
6452
6453 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6454 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6455
6456 bool HandleDenormalMode = false;
6457
6458 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6459 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6460 if (ParsedMode.isValid()) {
6461 DenormalFPMath = ParsedMode;
6462 AttrsToRemove.addAttribute("denormal-fp-math");
6463 AddingAttrs = RemovingAttrs = true;
6464 HandleDenormalMode = true;
6465 }
6466 }
6467
6468 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6469 Attr.isValid()) {
6470 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6471 if (ParsedMode.isValid()) {
6472 DenormalFPMathF32 = ParsedMode;
6473 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6474 AddingAttrs = RemovingAttrs = true;
6475 HandleDenormalMode = true;
6476 }
6477 }
6478
6479 if (HandleDenormalMode)
6480 AttrsToAdd.addDenormalFPEnvAttr(
6481 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6482
6483 if (RemovingAttrs)
6484 F.removeFnAttrs(AttrsToRemove);
6485
6486 if (AddingAttrs)
6487 F.addFnAttrs(AttrsToAdd);
6488}
6489
6490// Check if the function attribute is not present and set it.
6492 StringRef Value) {
6493 if (!F.hasFnAttribute(FnAttrName))
6494 F.addFnAttr(FnAttrName, Value);
6495}
6496
6497// Check if the function attribute is not present and set it if needed.
6498// If the attribute is "false" then removes it.
6499// If the attribute is "true" resets it to a valueless attribute.
6500static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6501 if (!F.hasFnAttribute(FnAttrName)) {
6502 if (Set)
6503 F.addFnAttr(FnAttrName);
6504 } else {
6505 auto A = F.getFnAttribute(FnAttrName);
6506 if ("false" == A.getValueAsString())
6507 F.removeFnAttr(FnAttrName);
6508 else if ("true" == A.getValueAsString()) {
6509 F.removeFnAttr(FnAttrName);
6510 F.addFnAttr(FnAttrName);
6511 }
6512 }
6513}
6514
6516 Triple T(M.getTargetTriple());
6517 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6518 return;
6519
6520 uint64_t BTEValue = 0;
6521 uint64_t BPPLRValue = 0;
6522 uint64_t GCSValue = 0;
6523 uint64_t SRAValue = 0;
6524 uint64_t SRAALLValue = 0;
6525 uint64_t SRABKeyValue = 0;
6526
6527 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6528 if (ModFlags) {
6529 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6530 MDNode *Op = ModFlags->getOperand(I);
6531 if (Op->getNumOperands() != 3)
6532 continue;
6533
6534 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6535 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6536 if (!ID || !CI)
6537 continue;
6538
6539 StringRef IDStr = ID->getString();
6540 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6541 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6542 : IDStr == "guarded-control-stack" ? &GCSValue
6543 : IDStr == "sign-return-address" ? &SRAValue
6544 : IDStr == "sign-return-address-all" ? &SRAALLValue
6545 : IDStr == "sign-return-address-with-bkey"
6546 ? &SRABKeyValue
6547 : nullptr;
6548 if (!ValPtr)
6549 continue;
6550
6551 *ValPtr = CI->getZExtValue();
6552 if (*ValPtr == 2)
6553 return;
6554 }
6555 }
6556
6557 bool BTE = BTEValue == 1;
6558 bool BPPLR = BPPLRValue == 1;
6559 bool GCS = GCSValue == 1;
6560 bool SRA = SRAValue == 1;
6561
6562 StringRef SignTypeValue = "non-leaf";
6563 if (SRA && SRAALLValue == 1)
6564 SignTypeValue = "all";
6565
6566 StringRef SignKeyValue = "a_key";
6567 if (SRA && SRABKeyValue == 1)
6568 SignKeyValue = "b_key";
6569
6570 for (Function &F : M.getFunctionList()) {
6571 if (F.isDeclaration())
6572 continue;
6573
6574 if (SRA) {
6575 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6576 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6577 } else {
6578 if (auto A = F.getFnAttribute("sign-return-address");
6579 A.isValid() && "none" == A.getValueAsString()) {
6580 F.removeFnAttr("sign-return-address");
6581 F.removeFnAttr("sign-return-address-key");
6582 }
6583 }
6584 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6585 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6586 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6587 }
6588
6589 if (BTE)
6590 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6591 if (BPPLR)
6592 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6593 if (GCS)
6594 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6595 if (SRA) {
6596 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6597 if (SRAALLValue == 1)
6598 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6599 if (SRABKeyValue == 1)
6600 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6601 }
6602}
6603
6604static bool isOldLoopArgument(Metadata *MD) {
6605 auto *T = dyn_cast_or_null<MDTuple>(MD);
6606 if (!T)
6607 return false;
6608 if (T->getNumOperands() < 1)
6609 return false;
6610 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6611 if (!S)
6612 return false;
6613 return S->getString().starts_with("llvm.vectorizer.");
6614}
6615
6617 StringRef OldPrefix = "llvm.vectorizer.";
6618 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6619
6620 if (OldTag == "llvm.vectorizer.unroll")
6621 return MDString::get(C, "llvm.loop.interleave.count");
6622
6623 return MDString::get(
6624 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6625 .str());
6626}
6627
6629 auto *T = dyn_cast_or_null<MDTuple>(MD);
6630 if (!T)
6631 return MD;
6632 if (T->getNumOperands() < 1)
6633 return MD;
6634 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6635 if (!OldTag)
6636 return MD;
6637 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6638 return MD;
6639
6640 // This has an old tag. Upgrade it.
6642 Ops.reserve(T->getNumOperands());
6643 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6644 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6645 Ops.push_back(T->getOperand(I));
6646
6647 return MDTuple::get(T->getContext(), Ops);
6648}
6649
6651 auto *T = dyn_cast<MDTuple>(&N);
6652 if (!T)
6653 return &N;
6654
6655 if (none_of(T->operands(), isOldLoopArgument))
6656 return &N;
6657
6659 Ops.reserve(T->getNumOperands());
6660 for (Metadata *MD : T->operands())
6661 Ops.push_back(upgradeLoopArgument(MD));
6662
6663 return MDTuple::get(T->getContext(), Ops);
6664}
6665
6667 Triple T(TT);
6668 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6669 // the address space of globals to 1. This does not apply to SPIRV Logical.
6670 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6671 !DL.contains("-G") && !DL.starts_with("G")) {
6672 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6673 }
6674
6675 if (T.isLoongArch64() || T.isRISCV64()) {
6676 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6677 auto I = DL.find("-n64-");
6678 if (I != StringRef::npos)
6679 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6680 return DL.str();
6681 }
6682
6683 // AMDGPU data layout upgrades.
6684 std::string Res = DL.str();
6685 if (T.isAMDGPU()) {
6686 // Define address spaces for constants.
6687 if (!DL.contains("-G") && !DL.starts_with("G"))
6688 Res.append(Res.empty() ? "G1" : "-G1");
6689
6690 // AMDGCN data layout upgrades.
6691 if (T.isAMDGCN()) {
6692
6693 // Add missing non-integral declarations.
6694 // This goes before adding new address spaces to prevent incoherent string
6695 // values.
6696 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6697 Res.append("-ni:7:8:9");
6698 // Update ni:7 to ni:7:8:9.
6699 if (DL.ends_with("ni:7"))
6700 Res.append(":8:9");
6701 if (DL.ends_with("ni:7:8"))
6702 Res.append(":9");
6703
6704 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6705 // resources) An empty data layout has already been upgraded to G1 by now.
6706 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6707 Res.append("-p7:160:256:256:32");
6708 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6709 Res.append("-p8:128:128:128:48");
6710 constexpr StringRef OldP8("-p8:128:128-");
6711 if (DL.contains(OldP8))
6712 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6713 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6714 Res.append("-p9:192:256:256:32");
6715 }
6716
6717 // Upgrade the ELF mangling mode.
6718 if (!DL.contains("m:e"))
6719 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6720
6721 return Res;
6722 }
6723
6724 if (T.isSystemZ() && !DL.empty()) {
6725 // Make sure the stack alignment is present.
6726 if (!DL.contains("-S64"))
6727 return "E-S64" + DL.drop_front(1).str();
6728 return DL.str();
6729 }
6730
6731 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6732 // If the datalayout matches the expected format, add pointer size address
6733 // spaces to the datalayout.
6734 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6735 if (!DL.contains(AddrSpaces)) {
6737 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6738 if (R.match(Res, &Groups))
6739 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6740 }
6741 };
6742
6743 // AArch64 data layout upgrades.
6744 if (T.isAArch64()) {
6745 // Add "-Fn32"
6746 if (!DL.empty() && !DL.contains("-Fn32"))
6747 Res.append("-Fn32");
6748 AddPtr32Ptr64AddrSpaces();
6749 return Res;
6750 }
6751
6752 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6753 T.isWasm()) {
6754 // Mips64 with o32 ABI did not add "-i128:128".
6755 // Add "-i128:128"
6756 std::string I64 = "-i64:64";
6757 std::string I128 = "-i128:128";
6758 if (!StringRef(Res).contains(I128)) {
6759 size_t Pos = Res.find(I64);
6760 if (Pos != size_t(-1))
6761 Res.insert(Pos + I64.size(), I128);
6762 }
6763 }
6764
6765 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6766 size_t Pos = Res.find("-S128");
6767 if (Pos == StringRef::npos)
6768 Pos = Res.size();
6769 Res.insert(Pos, "-f64:32:64");
6770 }
6771
6772 if (!T.isX86())
6773 return Res;
6774
6775 AddPtr32Ptr64AddrSpaces();
6776
6777 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6778 // for i128 operations prior to this being reflected in the data layout, and
6779 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6780 // boundaries, so although this is a breaking change, the upgrade is expected
6781 // to fix more IR than it breaks.
6782 // Intel MCU is an exception and uses 4-byte-alignment.
6783 if (!T.isOSIAMCU()) {
6784 std::string I128 = "-i128:128";
6785 if (StringRef Ref = Res; !Ref.contains(I128)) {
6787 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6788 if (R.match(Res, &Groups))
6789 Res = (Groups[1] + I128 + Groups[3]).str();
6790 }
6791 }
6792
6793 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6794 // Raising the alignment is safe because Clang did not produce f80 values in
6795 // the MSVC environment before this upgrade was added.
6796 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6797 StringRef Ref = Res;
6798 auto I = Ref.find("-f80:32-");
6799 if (I != StringRef::npos)
6800 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6801 }
6802
6803 return Res;
6804}
6805
6806void llvm::UpgradeAttributes(AttrBuilder &B) {
6807 StringRef FramePointer;
6808 Attribute A = B.getAttribute("no-frame-pointer-elim");
6809 if (A.isValid()) {
6810 // The value can be "true" or "false".
6811 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6812 B.removeAttribute("no-frame-pointer-elim");
6813 }
6814 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6815 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6816 if (FramePointer != "all")
6817 FramePointer = "non-leaf";
6818 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6819 }
6820 if (!FramePointer.empty())
6821 B.addAttribute("frame-pointer", FramePointer);
6822
6823 A = B.getAttribute("null-pointer-is-valid");
6824 if (A.isValid()) {
6825 // The value can be "true" or "false".
6826 bool NullPointerIsValid = A.getValueAsString() == "true";
6827 B.removeAttribute("null-pointer-is-valid");
6828 if (NullPointerIsValid)
6829 B.addAttribute(Attribute::NullPointerIsValid);
6830 }
6831
6832 A = B.getAttribute("uniform-work-group-size");
6833 if (A.isValid()) {
6834 StringRef Val = A.getValueAsString();
6835 if (!Val.empty()) {
6836 bool IsTrue = Val == "true";
6837 B.removeAttribute("uniform-work-group-size");
6838 if (IsTrue)
6839 B.addAttribute("uniform-work-group-size");
6840 }
6841 }
6842}
6843
6844void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6845 // clang.arc.attachedcall bundles are now required to have an operand.
6846 // If they don't, it's okay to drop them entirely: when there is an operand,
6847 // the "attachedcall" is meaningful and required, but without an operand,
6848 // it's just a marker NOP. Dropping it merely prevents an optimization.
6849 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6850 return OBD.getTag() == "clang.arc.attachedcall" &&
6851 OBD.inputs().empty();
6852 });
6853}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:96
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:449
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:886
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:629
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1760
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:895
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
static constexpr size_t npos
Definition StringRef.h:58
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:311
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:287
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
@ Length
Definition DWP.cpp:557
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106