LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
52#include "llvm/Support/Regex.h"
55#include <cstdint>
56#include <cstring>
57#include <numeric>
58
59using namespace llvm;
60
61static cl::opt<bool>
62 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
63 cl::desc("Disable autoupgrade of debug info"));
64
65static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
66
67// Report a fatal error along with the
68// Call Instruction which caused the error
69[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
70 CallBase *CI) {
71 CI->print(llvm::errs());
72 llvm::errs() << "\n";
74}
75
76// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
77// changed their type from v4f32 to v2i64.
79 Function *&NewFn) {
80 // Check whether this is an old version of the function, which received
81 // v4f32 arguments.
82 Type *Arg0Type = F->getFunctionType()->getParamType(0);
83 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
84 return false;
85
86 // Yes, it's old, replace it with new version.
87 rename(F);
88 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
89 return true;
90}
91
92// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
93// arguments have changed their type from i32 to i8.
95 Function *&NewFn) {
96 // Check that the last argument is an i32.
97 Type *LastArgType = F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
99 if (!LastArgType->isIntegerTy(32))
100 return false;
101
102 // Move this function aside and map down.
103 rename(F);
104 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
105 return true;
106}
107
108// Upgrade the declaration of fp compare intrinsics that change return type
109// from scalar to vXi1 mask.
111 Function *&NewFn) {
112 // Check if the return type is a vector.
113 if (F->getReturnType()->isVectorTy())
114 return false;
115
116 rename(F);
117 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
118 return true;
119}
120
121// Upgrade the declaration of multiply and add bytes intrinsics whose input
122// arguments' types have changed from vectors of i32 to vectors of i8
124 Function *&NewFn) {
125 // check if input argument type is a vector of i8
126 Type *Arg1Type = F->getFunctionType()->getParamType(1);
127 Type *Arg2Type = F->getFunctionType()->getParamType(2);
128 if (Arg1Type->isVectorTy() &&
129 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
130 Arg2Type->isVectorTy() &&
131 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
132 return false;
133
134 rename(F);
135 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
136 return true;
137}
138
139// Upgrade the declaration of multipy and add words intrinsics whose input
140// arguments' types have changed to vectors of i32 to vectors of i16
142 Function *&NewFn) {
143 // check if input argument type is a vector of i16
144 Type *Arg1Type = F->getFunctionType()->getParamType(1);
145 Type *Arg2Type = F->getFunctionType()->getParamType(2);
146 if (Arg1Type->isVectorTy() &&
147 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
148 Arg2Type->isVectorTy() &&
149 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getReturnType()->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 Function *&NewFn) {
169 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
170 return false;
171
172 rename(F);
173 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
174 return true;
175}
176
178 // All of the intrinsics matches below should be marked with which llvm
179 // version started autoupgrading them. At some point in the future we would
180 // like to use this information to remove upgrade code for some older
181 // intrinsics. It is currently undecided how we will determine that future
182 // point.
183 if (Name.consume_front("avx."))
184 return (Name.starts_with("blend.p") || // Added in 3.7
185 Name == "cvt.ps2.pd.256" || // Added in 3.9
186 Name == "cvtdq2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.ps.256" || // Added in 7.0
188 Name.starts_with("movnt.") || // Added in 3.2
189 Name.starts_with("sqrt.p") || // Added in 7.0
190 Name.starts_with("storeu.") || // Added in 3.9
191 Name.starts_with("vbroadcast.s") || // Added in 3.5
192 Name.starts_with("vbroadcastf128") || // Added in 4.0
193 Name.starts_with("vextractf128.") || // Added in 3.7
194 Name.starts_with("vinsertf128.") || // Added in 3.7
195 Name.starts_with("vperm2f128.") || // Added in 6.0
196 Name.starts_with("vpermil.")); // Added in 3.1
197
198 if (Name.consume_front("avx2."))
199 return (Name == "movntdqa" || // Added in 5.0
200 Name.starts_with("pabs.") || // Added in 6.0
201 Name.starts_with("padds.") || // Added in 8.0
202 Name.starts_with("paddus.") || // Added in 8.0
203 Name.starts_with("pblendd.") || // Added in 3.7
204 Name == "pblendw" || // Added in 3.7
205 Name.starts_with("pbroadcast") || // Added in 3.8
206 Name.starts_with("pcmpeq.") || // Added in 3.1
207 Name.starts_with("pcmpgt.") || // Added in 3.1
208 Name.starts_with("pmax") || // Added in 3.9
209 Name.starts_with("pmin") || // Added in 3.9
210 Name.starts_with("pmovsx") || // Added in 3.9
211 Name.starts_with("pmovzx") || // Added in 3.9
212 Name == "pmul.dq" || // Added in 7.0
213 Name == "pmulu.dq" || // Added in 7.0
214 Name.starts_with("psll.dq") || // Added in 3.7
215 Name.starts_with("psrl.dq") || // Added in 3.7
216 Name.starts_with("psubs.") || // Added in 8.0
217 Name.starts_with("psubus.") || // Added in 8.0
218 Name.starts_with("vbroadcast") || // Added in 3.8
219 Name == "vbroadcasti128" || // Added in 3.7
220 Name == "vextracti128" || // Added in 3.7
221 Name == "vinserti128" || // Added in 3.7
222 Name == "vperm2i128"); // Added in 6.0
223
224 if (Name.consume_front("avx512.")) {
225 if (Name.consume_front("mask."))
226 // 'avx512.mask.*'
227 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
228 Name.starts_with("and.") || // Added in 3.9
229 Name.starts_with("andn.") || // Added in 3.9
230 Name.starts_with("broadcast.s") || // Added in 3.9
231 Name.starts_with("broadcastf32x4.") || // Added in 6.0
232 Name.starts_with("broadcastf32x8.") || // Added in 6.0
233 Name.starts_with("broadcastf64x2.") || // Added in 6.0
234 Name.starts_with("broadcastf64x4.") || // Added in 6.0
235 Name.starts_with("broadcasti32x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x8.") || // Added in 6.0
237 Name.starts_with("broadcasti64x2.") || // Added in 6.0
238 Name.starts_with("broadcasti64x4.") || // Added in 6.0
239 Name.starts_with("cmp.b") || // Added in 5.0
240 Name.starts_with("cmp.d") || // Added in 5.0
241 Name.starts_with("cmp.q") || // Added in 5.0
242 Name.starts_with("cmp.w") || // Added in 5.0
243 Name.starts_with("compress.b") || // Added in 9.0
244 Name.starts_with("compress.d") || // Added in 9.0
245 Name.starts_with("compress.p") || // Added in 9.0
246 Name.starts_with("compress.q") || // Added in 9.0
247 Name.starts_with("compress.store.") || // Added in 7.0
248 Name.starts_with("compress.w") || // Added in 9.0
249 Name.starts_with("conflict.") || // Added in 9.0
250 Name.starts_with("cvtdq2pd.") || // Added in 4.0
251 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
252 Name == "cvtpd2dq.256" || // Added in 7.0
253 Name == "cvtpd2ps.256" || // Added in 7.0
254 Name == "cvtps2pd.128" || // Added in 7.0
255 Name == "cvtps2pd.256" || // Added in 7.0
256 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
257 Name == "cvtqq2ps.256" || // Added in 9.0
258 Name == "cvtqq2ps.512" || // Added in 9.0
259 Name == "cvttpd2dq.256" || // Added in 7.0
260 Name == "cvttps2dq.128" || // Added in 7.0
261 Name == "cvttps2dq.256" || // Added in 7.0
262 Name.starts_with("cvtudq2pd.") || // Added in 4.0
263 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
264 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
265 Name == "cvtuqq2ps.256" || // Added in 9.0
266 Name == "cvtuqq2ps.512" || // Added in 9.0
267 Name.starts_with("dbpsadbw.") || // Added in 7.0
268 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
269 Name.starts_with("expand.b") || // Added in 9.0
270 Name.starts_with("expand.d") || // Added in 9.0
271 Name.starts_with("expand.load.") || // Added in 7.0
272 Name.starts_with("expand.p") || // Added in 9.0
273 Name.starts_with("expand.q") || // Added in 9.0
274 Name.starts_with("expand.w") || // Added in 9.0
275 Name.starts_with("fpclass.p") || // Added in 7.0
276 Name.starts_with("insert") || // Added in 4.0
277 Name.starts_with("load.") || // Added in 3.9
278 Name.starts_with("loadu.") || // Added in 3.9
279 Name.starts_with("lzcnt.") || // Added in 5.0
280 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
281 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("movddup") || // Added in 3.9
283 Name.starts_with("move.s") || // Added in 4.0
284 Name.starts_with("movshdup") || // Added in 3.9
285 Name.starts_with("movsldup") || // Added in 3.9
286 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
287 Name.starts_with("or.") || // Added in 3.9
288 Name.starts_with("pabs.") || // Added in 6.0
289 Name.starts_with("packssdw.") || // Added in 5.0
290 Name.starts_with("packsswb.") || // Added in 5.0
291 Name.starts_with("packusdw.") || // Added in 5.0
292 Name.starts_with("packuswb.") || // Added in 5.0
293 Name.starts_with("padd.") || // Added in 4.0
294 Name.starts_with("padds.") || // Added in 8.0
295 Name.starts_with("paddus.") || // Added in 8.0
296 Name.starts_with("palignr.") || // Added in 3.9
297 Name.starts_with("pand.") || // Added in 3.9
298 Name.starts_with("pandn.") || // Added in 3.9
299 Name.starts_with("pavg") || // Added in 6.0
300 Name.starts_with("pbroadcast") || // Added in 6.0
301 Name.starts_with("pcmpeq.") || // Added in 3.9
302 Name.starts_with("pcmpgt.") || // Added in 3.9
303 Name.starts_with("perm.df.") || // Added in 3.9
304 Name.starts_with("perm.di.") || // Added in 3.9
305 Name.starts_with("permvar.") || // Added in 7.0
306 Name.starts_with("pmaddubs.w.") || // Added in 7.0
307 Name.starts_with("pmaddw.d.") || // Added in 7.0
308 Name.starts_with("pmax") || // Added in 4.0
309 Name.starts_with("pmin") || // Added in 4.0
310 Name == "pmov.qd.256" || // Added in 9.0
311 Name == "pmov.qd.512" || // Added in 9.0
312 Name == "pmov.wb.256" || // Added in 9.0
313 Name == "pmov.wb.512" || // Added in 9.0
314 Name.starts_with("pmovsx") || // Added in 4.0
315 Name.starts_with("pmovzx") || // Added in 4.0
316 Name.starts_with("pmul.dq.") || // Added in 4.0
317 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
318 Name.starts_with("pmulh.w.") || // Added in 7.0
319 Name.starts_with("pmulhu.w.") || // Added in 7.0
320 Name.starts_with("pmull.") || // Added in 4.0
321 Name.starts_with("pmultishift.qb.") || // Added in 8.0
322 Name.starts_with("pmulu.dq.") || // Added in 4.0
323 Name.starts_with("por.") || // Added in 3.9
324 Name.starts_with("prol.") || // Added in 8.0
325 Name.starts_with("prolv.") || // Added in 8.0
326 Name.starts_with("pror.") || // Added in 8.0
327 Name.starts_with("prorv.") || // Added in 8.0
328 Name.starts_with("pshuf.b.") || // Added in 4.0
329 Name.starts_with("pshuf.d.") || // Added in 3.9
330 Name.starts_with("pshufh.w.") || // Added in 3.9
331 Name.starts_with("pshufl.w.") || // Added in 3.9
332 Name.starts_with("psll.d") || // Added in 4.0
333 Name.starts_with("psll.q") || // Added in 4.0
334 Name.starts_with("psll.w") || // Added in 4.0
335 Name.starts_with("pslli") || // Added in 4.0
336 Name.starts_with("psllv") || // Added in 4.0
337 Name.starts_with("psra.d") || // Added in 4.0
338 Name.starts_with("psra.q") || // Added in 4.0
339 Name.starts_with("psra.w") || // Added in 4.0
340 Name.starts_with("psrai") || // Added in 4.0
341 Name.starts_with("psrav") || // Added in 4.0
342 Name.starts_with("psrl.d") || // Added in 4.0
343 Name.starts_with("psrl.q") || // Added in 4.0
344 Name.starts_with("psrl.w") || // Added in 4.0
345 Name.starts_with("psrli") || // Added in 4.0
346 Name.starts_with("psrlv") || // Added in 4.0
347 Name.starts_with("psub.") || // Added in 4.0
348 Name.starts_with("psubs.") || // Added in 8.0
349 Name.starts_with("psubus.") || // Added in 8.0
350 Name.starts_with("pternlog.") || // Added in 7.0
351 Name.starts_with("punpckh") || // Added in 3.9
352 Name.starts_with("punpckl") || // Added in 3.9
353 Name.starts_with("pxor.") || // Added in 3.9
354 Name.starts_with("shuf.f") || // Added in 6.0
355 Name.starts_with("shuf.i") || // Added in 6.0
356 Name.starts_with("shuf.p") || // Added in 4.0
357 Name.starts_with("sqrt.p") || // Added in 7.0
358 Name.starts_with("store.b.") || // Added in 3.9
359 Name.starts_with("store.d.") || // Added in 3.9
360 Name.starts_with("store.p") || // Added in 3.9
361 Name.starts_with("store.q.") || // Added in 3.9
362 Name.starts_with("store.w.") || // Added in 3.9
363 Name == "store.ss" || // Added in 7.0
364 Name.starts_with("storeu.") || // Added in 3.9
365 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
366 Name.starts_with("ucmp.") || // Added in 5.0
367 Name.starts_with("unpckh.") || // Added in 3.9
368 Name.starts_with("unpckl.") || // Added in 3.9
369 Name.starts_with("valign.") || // Added in 4.0
370 Name == "vcvtph2ps.128" || // Added in 11.0
371 Name == "vcvtph2ps.256" || // Added in 11.0
372 Name.starts_with("vextract") || // Added in 4.0
373 Name.starts_with("vfmadd.") || // Added in 7.0
374 Name.starts_with("vfmaddsub.") || // Added in 7.0
375 Name.starts_with("vfnmadd.") || // Added in 7.0
376 Name.starts_with("vfnmsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermi2var.") || // Added in 7.0
382 Name.starts_with("vpermil.p") || // Added in 3.9
383 Name.starts_with("vpermilvar.") || // Added in 4.0
384 Name.starts_with("vpermt2var.") || // Added in 7.0
385 Name.starts_with("vpmadd52") || // Added in 7.0
386 Name.starts_with("vpshld.") || // Added in 7.0
387 Name.starts_with("vpshldv.") || // Added in 8.0
388 Name.starts_with("vpshrd.") || // Added in 7.0
389 Name.starts_with("vpshrdv.") || // Added in 8.0
390 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
391 Name.starts_with("xor.")); // Added in 3.9
392
393 if (Name.consume_front("mask3."))
394 // 'avx512.mask3.*'
395 return (Name.starts_with("vfmadd.") || // Added in 7.0
396 Name.starts_with("vfmaddsub.") || // Added in 7.0
397 Name.starts_with("vfmsub.") || // Added in 7.0
398 Name.starts_with("vfmsubadd.") || // Added in 7.0
399 Name.starts_with("vfnmsub.")); // Added in 7.0
400
401 if (Name.consume_front("maskz."))
402 // 'avx512.maskz.*'
403 return (Name.starts_with("pternlog.") || // Added in 7.0
404 Name.starts_with("vfmadd.") || // Added in 7.0
405 Name.starts_with("vfmaddsub.") || // Added in 7.0
406 Name.starts_with("vpdpbusd.") || // Added in 7.0
407 Name.starts_with("vpdpbusds.") || // Added in 7.0
408 Name.starts_with("vpdpwssd.") || // Added in 7.0
409 Name.starts_with("vpdpwssds.") || // Added in 7.0
410 Name.starts_with("vpermt2var.") || // Added in 7.0
411 Name.starts_with("vpmadd52") || // Added in 7.0
412 Name.starts_with("vpshldv.") || // Added in 8.0
413 Name.starts_with("vpshrdv.")); // Added in 8.0
414
415 // 'avx512.*'
416 return (Name == "movntdqa" || // Added in 5.0
417 Name == "pmul.dq.512" || // Added in 7.0
418 Name == "pmulu.dq.512" || // Added in 7.0
419 Name.starts_with("broadcastm") || // Added in 6.0
420 Name.starts_with("cmp.p") || // Added in 12.0
421 Name.starts_with("cvtb2mask.") || // Added in 7.0
422 Name.starts_with("cvtd2mask.") || // Added in 7.0
423 Name.starts_with("cvtmask2") || // Added in 5.0
424 Name.starts_with("cvtq2mask.") || // Added in 7.0
425 Name == "cvtusi2sd" || // Added in 7.0
426 Name.starts_with("cvtw2mask.") || // Added in 7.0
427 Name == "kand.w" || // Added in 7.0
428 Name == "kandn.w" || // Added in 7.0
429 Name == "knot.w" || // Added in 7.0
430 Name == "kor.w" || // Added in 7.0
431 Name == "kortestc.w" || // Added in 7.0
432 Name == "kortestz.w" || // Added in 7.0
433 Name.starts_with("kunpck") || // added in 6.0
434 Name == "kxnor.w" || // Added in 7.0
435 Name == "kxor.w" || // Added in 7.0
436 Name.starts_with("padds.") || // Added in 8.0
437 Name.starts_with("pbroadcast") || // Added in 3.9
438 Name.starts_with("prol") || // Added in 8.0
439 Name.starts_with("pror") || // Added in 8.0
440 Name.starts_with("psll.dq") || // Added in 3.9
441 Name.starts_with("psrl.dq") || // Added in 3.9
442 Name.starts_with("psubs.") || // Added in 8.0
443 Name.starts_with("ptestm") || // Added in 6.0
444 Name.starts_with("ptestnm") || // Added in 6.0
445 Name.starts_with("storent.") || // Added in 3.9
446 Name.starts_with("vbroadcast.s") || // Added in 7.0
447 Name.starts_with("vpshld.") || // Added in 8.0
448 Name.starts_with("vpshrd.")); // Added in 8.0
449 }
450
451 if (Name.consume_front("fma."))
452 return (Name.starts_with("vfmadd.") || // Added in 7.0
453 Name.starts_with("vfmsub.") || // Added in 7.0
454 Name.starts_with("vfmsubadd.") || // Added in 7.0
455 Name.starts_with("vfnmadd.") || // Added in 7.0
456 Name.starts_with("vfnmsub.")); // Added in 7.0
457
458 if (Name.consume_front("fma4."))
459 return Name.starts_with("vfmadd.s"); // Added in 7.0
460
461 if (Name.consume_front("sse."))
462 return (Name == "add.ss" || // Added in 4.0
463 Name == "cvtsi2ss" || // Added in 7.0
464 Name == "cvtsi642ss" || // Added in 7.0
465 Name == "div.ss" || // Added in 4.0
466 Name == "mul.ss" || // Added in 4.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.ss" || // Added in 7.0
469 Name.starts_with("storeu.") || // Added in 3.9
470 Name == "sub.ss"); // Added in 4.0
471
472 if (Name.consume_front("sse2."))
473 return (Name == "add.sd" || // Added in 4.0
474 Name == "cvtdq2pd" || // Added in 3.9
475 Name == "cvtdq2ps" || // Added in 7.0
476 Name == "cvtps2pd" || // Added in 3.9
477 Name == "cvtsi2sd" || // Added in 7.0
478 Name == "cvtsi642sd" || // Added in 7.0
479 Name == "cvtss2sd" || // Added in 7.0
480 Name == "div.sd" || // Added in 4.0
481 Name == "mul.sd" || // Added in 4.0
482 Name.starts_with("padds.") || // Added in 8.0
483 Name.starts_with("paddus.") || // Added in 8.0
484 Name.starts_with("pcmpeq.") || // Added in 3.1
485 Name.starts_with("pcmpgt.") || // Added in 3.1
486 Name == "pmaxs.w" || // Added in 3.9
487 Name == "pmaxu.b" || // Added in 3.9
488 Name == "pmins.w" || // Added in 3.9
489 Name == "pminu.b" || // Added in 3.9
490 Name == "pmulu.dq" || // Added in 7.0
491 Name.starts_with("pshuf") || // Added in 3.9
492 Name.starts_with("psll.dq") || // Added in 3.7
493 Name.starts_with("psrl.dq") || // Added in 3.7
494 Name.starts_with("psubs.") || // Added in 8.0
495 Name.starts_with("psubus.") || // Added in 8.0
496 Name.starts_with("sqrt.p") || // Added in 7.0
497 Name == "sqrt.sd" || // Added in 7.0
498 Name == "storel.dq" || // Added in 3.9
499 Name.starts_with("storeu.") || // Added in 3.9
500 Name == "sub.sd"); // Added in 4.0
501
502 if (Name.consume_front("sse41."))
503 return (Name.starts_with("blendp") || // Added in 3.7
504 Name == "movntdqa" || // Added in 5.0
505 Name == "pblendw" || // Added in 3.7
506 Name == "pmaxsb" || // Added in 3.9
507 Name == "pmaxsd" || // Added in 3.9
508 Name == "pmaxud" || // Added in 3.9
509 Name == "pmaxuw" || // Added in 3.9
510 Name == "pminsb" || // Added in 3.9
511 Name == "pminsd" || // Added in 3.9
512 Name == "pminud" || // Added in 3.9
513 Name == "pminuw" || // Added in 3.9
514 Name.starts_with("pmovsx") || // Added in 3.8
515 Name.starts_with("pmovzx") || // Added in 3.9
516 Name == "pmuldq"); // Added in 7.0
517
518 if (Name.consume_front("sse42."))
519 return Name == "crc32.64.8"; // Added in 3.4
520
521 if (Name.consume_front("sse4a."))
522 return Name.starts_with("movnt."); // Added in 3.9
523
524 if (Name.consume_front("ssse3."))
525 return (Name == "pabs.b.128" || // Added in 6.0
526 Name == "pabs.d.128" || // Added in 6.0
527 Name == "pabs.w.128"); // Added in 6.0
528
529 if (Name.consume_front("xop."))
530 return (Name == "vpcmov" || // Added in 3.8
531 Name == "vpcmov.256" || // Added in 5.0
532 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
533 Name.starts_with("vprot")); // Added in 8.0
534
535 return (Name == "addcarry.u32" || // Added in 8.0
536 Name == "addcarry.u64" || // Added in 8.0
537 Name == "addcarryx.u32" || // Added in 8.0
538 Name == "addcarryx.u64" || // Added in 8.0
539 Name == "subborrow.u32" || // Added in 8.0
540 Name == "subborrow.u64" || // Added in 8.0
541 Name.starts_with("vcvtph2ps.")); // Added in 11.0
542}
543
545 Function *&NewFn) {
546 // Only handle intrinsics that start with "x86.".
547 if (!Name.consume_front("x86."))
548 return false;
549
550 if (shouldUpgradeX86Intrinsic(F, Name)) {
551 NewFn = nullptr;
552 return true;
553 }
554
555 if (Name == "rdtscp") { // Added in 8.0
556 // If this intrinsic has 0 operands, it's the new version.
557 if (F->getFunctionType()->getNumParams() == 0)
558 return false;
559
560 rename(F);
561 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
562 Intrinsic::x86_rdtscp);
563 return true;
564 }
565
567
568 // SSE4.1 ptest functions may have an old signature.
569 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
571 .Case("c", Intrinsic::x86_sse41_ptestc)
572 .Case("z", Intrinsic::x86_sse41_ptestz)
573 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
576 return upgradePTESTIntrinsic(F, ID, NewFn);
577
578 return false;
579 }
580
581 // Several blend and other instructions with masks used the wrong number of
582 // bits.
583
584 // Added in 3.6
586 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
594 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
595
596 if (Name.consume_front("avx512.")) {
597 if (Name.consume_front("mask.cmp.")) {
598 // Added in 7.0
600 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
608 return upgradeX86MaskedFPCompare(F, ID, NewFn);
609 } else if (Name.starts_with("vpdpbusd.") ||
610 Name.starts_with("vpdpbusds.")) {
611 // Added in 21.1
613 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
621 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
622 } else if (Name.starts_with("vpdpwssd.") ||
623 Name.starts_with("vpdpwssds.")) {
624 // Added in 21.1
626 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
634 return upgradeX86MultiplyAddWords(F, ID, NewFn);
635 }
636 return false; // No other 'x86.avx512.*'.
637 }
638
639 if (Name.consume_front("avx2.")) {
640 if (Name.consume_front("vpdpb")) {
641 // Added in 21.1
643 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
657 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
658 } else if (Name.consume_front("vpdpw")) {
659 // Added in 21.1
661 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
675 return upgradeX86MultiplyAddWords(F, ID, NewFn);
676 }
677 return false; // No other 'x86.avx2.*'
678 }
679
680 if (Name.consume_front("avx10.")) {
681 if (Name.consume_front("vpdpb")) {
682 // Added in 21.1
684 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
692 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
693 } else if (Name.consume_front("vpdpw")) {
695 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
703 return upgradeX86MultiplyAddWords(F, ID, NewFn);
704 }
705 return false; // No other 'x86.avx10.*'
706 }
707
708 if (Name.consume_front("avx512bf16.")) {
709 // Added in 9.0
711 .Case("cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .Case("cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .Case("cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .Case("mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .Case("cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .Case("cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
725 return upgradeX86BF16Intrinsic(F, ID, NewFn);
726
727 // Added in 9.0
729 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
734 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
735 return false; // No other 'x86.avx512bf16.*'.
736 }
737
738 if (Name.consume_front("xop.")) {
740 if (Name.starts_with("vpermil2")) { // Added in 3.9
741 // Upgrade any XOP PERMIL2 index operand still using a float/double
742 // vector.
743 auto Idx = F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
753 else
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
755 }
756 } else if (F->arg_size() == 2)
757 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
759 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
762
764 rename(F);
765 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
766 return true;
767 }
768 return false; // No other 'x86.xop.*'
769 }
770
771 if (Name == "seh.recoverfp") {
772 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
773 Intrinsic::eh_recoverfp);
774 return true;
775 }
776
777 return false;
778}
779
780// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
781// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
783 StringRef Name,
784 Function *&NewFn) {
785 if (Name.starts_with("rbit")) {
786 // '(arm|aarch64).rbit'.
788 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
789 return true;
790 }
791
792 if (Name == "thread.pointer") {
793 // '(arm|aarch64).thread.pointer'.
795 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
796 return true;
797 }
798
799 bool Neon = Name.consume_front("neon.");
800 if (Neon) {
801 // '(arm|aarch64).neon.*'.
802 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
803 // v16i8 respectively.
804 if (Name.consume_front("bfdot.")) {
805 // (arm|aarch64).neon.bfdot.*'.
808 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
809 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
810 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
813 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
816 LLVMContext &Ctx = F->getParent()->getContext();
817 std::array<Type *, 2> Tys{
818 {F->getReturnType(),
819 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
820 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
821 return true;
822 }
823 return false; // No other '(arm|aarch64).neon.bfdot.*'.
824 }
825
826 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
827 // anymore and accept v8bf16 instead of v16i8.
828 if (Name.consume_front("bfm")) {
829 // (arm|aarch64).neon.bfm*'.
830 if (Name.consume_back(".v4f32.v16i8")) {
831 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
834 .Case("mla",
835 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
836 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
837 .Case("lalb",
838 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
839 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
840 .Case("lalt",
841 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
842 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
845 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
846 return true;
847 }
848 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
849 }
850 return false; // No other '(arm|aarch64).neon.bfm*.
851 }
852 // Continue on to Aarch64 Neon or Arm Neon.
853 }
854 // Continue on to Arm or Aarch64.
855
856 if (IsArm) {
857 // 'arm.*'.
858 if (Neon) {
859 // 'arm.neon.*'.
861 .StartsWith("vclz.", Intrinsic::ctlz)
862 .StartsWith("vcnt.", Intrinsic::ctpop)
863 .StartsWith("vqadds.", Intrinsic::sadd_sat)
864 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
865 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
866 .StartsWith("vqsubu.", Intrinsic::usub_sat)
867 .StartsWith("vrinta.", Intrinsic::round)
868 .StartsWith("vrintn.", Intrinsic::roundeven)
869 .StartsWith("vrintm.", Intrinsic::floor)
870 .StartsWith("vrintp.", Intrinsic::ceil)
871 .StartsWith("vrintx.", Intrinsic::rint)
872 .StartsWith("vrintz.", Intrinsic::trunc)
875 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
876 F->arg_begin()->getType());
877 return true;
878 }
879
880 if (Name.consume_front("vst")) {
881 // 'arm.neon.vst*'.
882 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
884 if (vstRegex.match(Name, &Groups)) {
885 static const Intrinsic::ID StoreInts[] = {
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
888
889 static const Intrinsic::ID StoreLaneInts[] = {
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
892
893 auto fArgs = F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
895 if (Groups[1].size() == 1)
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
898 else
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
901 return true;
902 }
903 return false; // No other 'arm.neon.vst*'.
904 }
905
906 return false; // No other 'arm.neon.*'.
907 }
908
909 if (Name.consume_front("mve.")) {
910 // 'arm.mve.*'.
911 if (Name == "vctp64") {
912 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
913 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
914 // the function and deal with it below in UpgradeIntrinsicCall.
915 rename(F);
916 return true;
917 }
918 return false; // Not 'arm.mve.vctp64'.
919 }
920
921 if (Name.starts_with("vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
924 return true;
925 }
926
927 // These too are changed to accept a v2i1 instead of the old v4i1.
928 if (Name.consume_back(".v4i1")) {
929 // 'arm.mve.*.v4i1'.
930 if (Name.consume_back(".predicated.v2i64.v4i32"))
931 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
932 return Name == "mull.int" || Name == "vqdmull";
933
934 if (Name.consume_back(".v2i64")) {
935 // 'arm.mve.*.v2i64.v4i1'
936 bool IsGather = Name.consume_front("vldr.gather.");
937 if (IsGather || Name.consume_front("vstr.scatter.")) {
938 if (Name.consume_front("base.")) {
939 // Optional 'wb.' prefix.
940 Name.consume_front("wb.");
941 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
942 // predicated.v2i64.v2i64.v4i1'.
943 return Name == "predicated.v2i64";
944 }
945
946 if (Name.consume_front("offset.predicated."))
947 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
948 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
949
950 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
951 return false;
952 }
953
954 return false; // No other 'arm.mve.*.v2i64.v4i1'.
955 }
956 return false; // No other 'arm.mve.*.v4i1'.
957 }
958 return false; // No other 'arm.mve.*'.
959 }
960
961 if (Name.consume_front("cde.vcx")) {
962 // 'arm.cde.vcx*'.
963 if (Name.consume_back(".predicated.v2i64.v4i1"))
964 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
965 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
966 Name == "3q" || Name == "3qa";
967
968 return false; // No other 'arm.cde.vcx*'.
969 }
970 } else {
971 // 'aarch64.*'.
972 if (Neon) {
973 // 'aarch64.neon.*'.
975 .StartsWith("frintn", Intrinsic::roundeven)
976 .StartsWith("rbit", Intrinsic::bitreverse)
979 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
980 F->arg_begin()->getType());
981 return true;
982 }
983
984 if (Name.starts_with("addp")) {
985 // 'aarch64.neon.addp*'.
986 if (F->arg_size() != 2)
987 return false; // Invalid IR.
988 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
992 return true;
993 }
994 }
995
996 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
997 if (Name.starts_with("bfcvt")) {
998 NewFn = nullptr;
999 return true;
1000 }
1001
1002 return false; // No other 'aarch64.neon.*'.
1003 }
1004 if (Name.consume_front("sve.")) {
1005 // 'aarch64.sve.*'.
1006 if (Name.consume_front("bf")) {
1007 if (Name == "mmla") {
1008 Type *Tys[] = {F->getReturnType(),
1009 std::next(F->arg_begin())->getType()};
1011 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1012 return true;
1013 }
1014 if (Name.consume_back(".lane")) {
1015 // 'aarch64.sve.bf*.lane'.
1018 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1019 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1020 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1023 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1024 return true;
1025 }
1026 return false; // No other 'aarch64.sve.bf*.lane'.
1027 }
1028 return false; // No other 'aarch64.sve.bf*'.
1029 }
1030
1031 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1032 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1033 NewFn = nullptr;
1034 return true;
1035 }
1036
1037 if (Name.consume_front("addqv")) {
1038 // 'aarch64.sve.addqv'.
1039 if (!F->getReturnType()->isFPOrFPVectorTy())
1040 return false;
1041
1042 auto Args = F->getFunctionType()->params();
1043 Type *Tys[] = {F->getReturnType(), Args[1]};
1045 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1046 return true;
1047 }
1048
1049 if (Name.consume_front("ld")) {
1050 // 'aarch64.sve.ld*'.
1051 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1052 if (LdRegex.match(Name)) {
1053 Type *ScalarTy =
1054 cast<VectorType>(F->getReturnType())->getElementType();
1055 ElementCount EC =
1056 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1057 assert(F->arg_size() == 2 &&
1058 "Expected 2 arguments for ld* intrinsic.");
1059 Type *PtrTy = F->getArg(1)->getType();
1060 Type *Ty = VectorType::get(ScalarTy, EC);
1061 static const Intrinsic::ID LoadIDs[] = {
1062 Intrinsic::aarch64_sve_ld2_sret,
1063 Intrinsic::aarch64_sve_ld3_sret,
1064 Intrinsic::aarch64_sve_ld4_sret,
1065 };
1067 F->getParent(), LoadIDs[Name[0] - '2'], {Ty, PtrTy});
1068 return true;
1069 }
1070 return false; // No other 'aarch64.sve.ld*'.
1071 }
1072
1073 if (Name.consume_front("tuple.")) {
1074 // 'aarch64.sve.tuple.*'.
1075 if (Name.starts_with("get")) {
1076 // 'aarch64.sve.tuple.get*'.
1077 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1079 F->getParent(), Intrinsic::vector_extract, Tys);
1080 return true;
1081 }
1082
1083 if (Name.starts_with("set")) {
1084 // 'aarch64.sve.tuple.set*'.
1085 auto Args = F->getFunctionType()->params();
1086 Type *Tys[] = {Args[0], Args[2], Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1089 return true;
1090 }
1091
1092 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1093 if (CreateTupleRegex.match(Name)) {
1094 // 'aarch64.sve.tuple.create*'.
1095 auto Args = F->getFunctionType()->params();
1096 Type *Tys[] = {F->getReturnType(), Args[1]};
1098 F->getParent(), Intrinsic::vector_insert, Tys);
1099 return true;
1100 }
1101 return false; // No other 'aarch64.sve.tuple.*'.
1102 }
1103
1104 if (Name.starts_with("rev.nxv")) {
1105 // 'aarch64.sve.rev.<Ty>'
1107 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1108 return true;
1109 }
1110
1111 return false; // No other 'aarch64.sve.*'.
1112 }
1113 }
1114 return false; // No other 'arm.*', 'aarch64.*'.
1115}
1116
1118 StringRef Name) {
1119 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1122 .Case("im2col.3d",
1123 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1124 .Case("im2col.4d",
1125 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1126 .Case("im2col.5d",
1127 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1128 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1129 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1130 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1131 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1132 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1134
1136 return ID;
1137
1138 // These intrinsics may need upgrade for two reasons:
1139 // (1) When the address-space of the first argument is shared[AS=3]
1140 // (and we upgrade it to use shared_cluster address-space[AS=7])
1141 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1143 return ID;
1144
1145 // (2) When there are only two boolean flag arguments at the end:
1146 //
1147 // The last three parameters of the older version of these
1148 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1149 //
1150 // The newer version reads as:
1151 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1152 //
1153 // So, when the type of the [N-3]rd argument is "not i1", then
1154 // it is the older version and we need to upgrade.
1155 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1156 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1157 if (!ArgType->isIntegerTy(1))
1158 return ID;
1159 }
1160
1162}
1163
1165 StringRef Name) {
1166 if (Name.consume_front("mapa.shared.cluster"))
1167 if (F->getReturnType()->getPointerAddressSpace() ==
1169 return Intrinsic::nvvm_mapa_shared_cluster;
1170
1171 if (Name.consume_front("cp.async.bulk.")) {
1174 .Case("global.to.shared.cluster",
1175 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1176 .Case("shared.cta.to.cluster",
1177 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1179
1181 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1183 return ID;
1184 }
1185
1187}
1188
1190 if (Name.consume_front("fma.rn."))
1191 return StringSwitch<Intrinsic::ID>(Name)
1192 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1193 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1194 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1195 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1197
1198 if (Name.consume_front("fmax."))
1199 return StringSwitch<Intrinsic::ID>(Name)
1200 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1201 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1202 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1203 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1204 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1205 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1206 .Case("ftz.nan.xorsign.abs.bf16",
1207 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1208 .Case("ftz.nan.xorsign.abs.bf16x2",
1209 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1210 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1211 .Case("ftz.xorsign.abs.bf16x2",
1212 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1213 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1214 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1215 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1216 .Case("nan.xorsign.abs.bf16x2",
1217 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1218 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1219 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1221
1222 if (Name.consume_front("fmin."))
1223 return StringSwitch<Intrinsic::ID>(Name)
1224 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1225 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1226 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1227 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1228 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1229 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1230 .Case("ftz.nan.xorsign.abs.bf16",
1231 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1232 .Case("ftz.nan.xorsign.abs.bf16x2",
1233 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1234 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1235 .Case("ftz.xorsign.abs.bf16x2",
1236 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1237 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1238 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1239 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1240 .Case("nan.xorsign.abs.bf16x2",
1241 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1242 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1243 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1245
1246 if (Name.consume_front("neg."))
1247 return StringSwitch<Intrinsic::ID>(Name)
1248 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1249 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1251
1253}
1254
1256 return Name.consume_front("local") || Name.consume_front("shared") ||
1257 Name.consume_front("global") || Name.consume_front("constant") ||
1258 Name.consume_front("param");
1259}
1260
1262 const FunctionType *FuncTy) {
1263 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1264 if (Name.starts_with("to.fp16")) {
1265 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1266 HalfTy) &&
1267 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1268 FuncTy->getReturnType());
1269 }
1270
1271 if (Name.starts_with("from.fp16")) {
1272 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1273 HalfTy) &&
1274 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1275 FuncTy->getReturnType());
1276 }
1277
1278 return false;
1279}
1280
1282 bool CanUpgradeDebugIntrinsicsToRecords) {
1283 assert(F && "Illegal to upgrade a non-existent Function.");
1284
1285 StringRef Name = F->getName();
1286
1287 // Quickly eliminate it, if it's not a candidate.
1288 if (!Name.consume_front("llvm.") || Name.empty())
1289 return false;
1290
1291 switch (Name[0]) {
1292 default: break;
1293 case 'a': {
1294 bool IsArm = Name.consume_front("arm.");
1295 if (IsArm || Name.consume_front("aarch64.")) {
1296 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1297 return true;
1298 break;
1299 }
1300
1301 if (Name.consume_front("amdgcn.")) {
1302 if (Name == "alignbit") {
1303 // Target specific intrinsic became redundant
1305 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1306 return true;
1307 }
1308
1309 if (Name.consume_front("atomic.")) {
1310 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1311 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1312 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1313 // and usub_sat so there's no new declaration.
1314 NewFn = nullptr;
1315 return true;
1316 }
1317 break; // No other 'amdgcn.atomic.*'
1318 }
1319
1320 switch (F->getIntrinsicID()) {
1321 default:
1322 break;
1323 // Legacy wmma iu intrinsics without the optional clamp operand.
1324 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1325 if (F->arg_size() == 7) {
1326 NewFn = nullptr;
1327 return true;
1328 }
1329 break;
1330 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1331 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1334 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1336 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1337 if (F->arg_size() == 8) {
1338 NewFn = nullptr;
1339 return true;
1340 }
1341 break;
1342 }
1343
1344 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1345 Name.consume_front("flat.atomic.")) {
1346 if (Name.starts_with("fadd") ||
1347 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1348 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1349 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1350 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1351 // declaration.
1352 NewFn = nullptr;
1353 return true;
1354 }
1355 }
1356
1357 if (Name.starts_with("ldexp.")) {
1358 // Target specific intrinsic became redundant
1360 F->getParent(), Intrinsic::ldexp,
1361 {F->getReturnType(), F->getArg(1)->getType()});
1362 return true;
1363 }
1364 break; // No other 'amdgcn.*'
1365 }
1366
1367 break;
1368 }
1369 case 'c': {
1370 if (F->arg_size() == 1) {
1371 if (Name.consume_front("convert.")) {
1372 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1373 NewFn = nullptr;
1374 return true;
1375 }
1376 }
1377
1379 .StartsWith("ctlz.", Intrinsic::ctlz)
1380 .StartsWith("cttz.", Intrinsic::cttz)
1383 rename(F);
1384 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1385 F->arg_begin()->getType());
1386 return true;
1387 }
1388 }
1389
1390 if (F->arg_size() == 2 && Name == "coro.end") {
1391 rename(F);
1392 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1393 Intrinsic::coro_end);
1394 return true;
1395 }
1396
1397 break;
1398 }
1399 case 'd':
1400 if (Name.consume_front("dbg.")) {
1401 // Mark debug intrinsics for upgrade to new debug format.
1402 if (CanUpgradeDebugIntrinsicsToRecords) {
1403 if (Name == "addr" || Name == "value" || Name == "assign" ||
1404 Name == "declare" || Name == "label") {
1405 // There's no function to replace these with.
1406 NewFn = nullptr;
1407 // But we do want these to get upgraded.
1408 return true;
1409 }
1410 }
1411 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1412 // converted to DbgVariableRecords later.
1413 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1414 rename(F);
1415 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1416 Intrinsic::dbg_value);
1417 return true;
1418 }
1419 break; // No other 'dbg.*'.
1420 }
1421 break;
1422 case 'e':
1423 if (Name.consume_front("experimental.vector.")) {
1426 // Skip over extract.last.active, otherwise it will be 'upgraded'
1427 // to a regular vector extract which is a different operation.
1428 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1429 .StartsWith("extract.", Intrinsic::vector_extract)
1430 .StartsWith("insert.", Intrinsic::vector_insert)
1431 .StartsWith("reverse.", Intrinsic::vector_reverse)
1432 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1433 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1434 .StartsWith("partial.reduce.add",
1435 Intrinsic::vector_partial_reduce_add)
1438 const auto *FT = F->getFunctionType();
1440 if (ID == Intrinsic::vector_extract ||
1441 ID == Intrinsic::vector_interleave2)
1442 // Extracting overloads the return type.
1443 Tys.push_back(FT->getReturnType());
1444 if (ID != Intrinsic::vector_interleave2)
1445 Tys.push_back(FT->getParamType(0));
1446 if (ID == Intrinsic::vector_insert ||
1447 ID == Intrinsic::vector_partial_reduce_add)
1448 // Inserting overloads the inserted type.
1449 Tys.push_back(FT->getParamType(1));
1450 rename(F);
1451 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1452 return true;
1453 }
1454
1455 if (Name.consume_front("reduce.")) {
1457 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1458 if (R.match(Name, &Groups))
1460 .Case("add", Intrinsic::vector_reduce_add)
1461 .Case("mul", Intrinsic::vector_reduce_mul)
1462 .Case("and", Intrinsic::vector_reduce_and)
1463 .Case("or", Intrinsic::vector_reduce_or)
1464 .Case("xor", Intrinsic::vector_reduce_xor)
1465 .Case("smax", Intrinsic::vector_reduce_smax)
1466 .Case("smin", Intrinsic::vector_reduce_smin)
1467 .Case("umax", Intrinsic::vector_reduce_umax)
1468 .Case("umin", Intrinsic::vector_reduce_umin)
1469 .Case("fmax", Intrinsic::vector_reduce_fmax)
1470 .Case("fmin", Intrinsic::vector_reduce_fmin)
1472
1473 bool V2 = false;
1475 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1476 Groups.clear();
1477 V2 = true;
1478 if (R2.match(Name, &Groups))
1480 .Case("fadd", Intrinsic::vector_reduce_fadd)
1481 .Case("fmul", Intrinsic::vector_reduce_fmul)
1483 }
1485 rename(F);
1486 auto Args = F->getFunctionType()->params();
1487 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1488 {Args[V2 ? 1 : 0]});
1489 return true;
1490 }
1491 break; // No other 'expermental.vector.reduce.*'.
1492 }
1493
1494 if (Name.consume_front("splice"))
1495 return true;
1496 break; // No other 'experimental.vector.*'.
1497 }
1498 if (Name.consume_front("experimental.stepvector.")) {
1499 Intrinsic::ID ID = Intrinsic::stepvector;
1500 rename(F);
1502 F->getParent(), ID, F->getFunctionType()->getReturnType());
1503 return true;
1504 }
1505 break; // No other 'e*'.
1506 case 'f':
1507 if (Name.starts_with("flt.rounds")) {
1508 rename(F);
1509 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1510 Intrinsic::get_rounding);
1511 return true;
1512 }
1513 break;
1514 case 'i':
1515 if (Name.starts_with("invariant.group.barrier")) {
1516 // Rename invariant.group.barrier to launder.invariant.group
1517 auto Args = F->getFunctionType()->params();
1518 Type* ObjectPtr[1] = {Args[0]};
1519 rename(F);
1521 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1522 return true;
1523 }
1524 break;
1525 case 'l':
1526 if ((Name.starts_with("lifetime.start") ||
1527 Name.starts_with("lifetime.end")) &&
1528 F->arg_size() == 2) {
1529 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1530 ? Intrinsic::lifetime_start
1531 : Intrinsic::lifetime_end;
1532 rename(F);
1533 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1534 F->getArg(0)->getType());
1535 return true;
1536 }
1537 break;
1538 case 'm': {
1539 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1540 // alignment parameter to embedding the alignment as an attribute of
1541 // the pointer args.
1542 if (unsigned ID = StringSwitch<unsigned>(Name)
1543 .StartsWith("memcpy.", Intrinsic::memcpy)
1544 .StartsWith("memmove.", Intrinsic::memmove)
1545 .Default(0)) {
1546 if (F->arg_size() == 5) {
1547 rename(F);
1548 // Get the types of dest, src, and len
1549 ArrayRef<Type *> ParamTypes =
1550 F->getFunctionType()->params().slice(0, 3);
1551 NewFn =
1552 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1553 return true;
1554 }
1555 }
1556 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1557 rename(F);
1558 // Get the types of dest, and len
1559 const auto *FT = F->getFunctionType();
1560 Type *ParamTypes[2] = {
1561 FT->getParamType(0), // Dest
1562 FT->getParamType(2) // len
1563 };
1564 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1565 Intrinsic::memset, ParamTypes);
1566 return true;
1567 }
1568
1569 unsigned MaskedID =
1571 .StartsWith("masked.load", Intrinsic::masked_load)
1572 .StartsWith("masked.gather", Intrinsic::masked_gather)
1573 .StartsWith("masked.store", Intrinsic::masked_store)
1574 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1575 .Default(0);
1576 if (MaskedID && F->arg_size() == 4) {
1577 rename(F);
1578 if (MaskedID == Intrinsic::masked_load ||
1579 MaskedID == Intrinsic::masked_gather) {
1581 F->getParent(), MaskedID,
1582 {F->getReturnType(), F->getArg(0)->getType()});
1583 return true;
1584 }
1586 F->getParent(), MaskedID,
1587 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1588 return true;
1589 }
1590 break;
1591 }
1592 case 'n': {
1593 if (Name.consume_front("nvvm.")) {
1594 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1595 if (F->arg_size() == 1) {
1596 Intrinsic::ID IID =
1598 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1599 .Case("clz.i", Intrinsic::ctlz)
1600 .Case("popc.i", Intrinsic::ctpop)
1602 if (IID != Intrinsic::not_intrinsic) {
1603 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1604 {F->getReturnType()});
1605 return true;
1606 }
1607 } else if (F->arg_size() == 2) {
1608 Intrinsic::ID IID =
1610 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1611 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1612 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1613 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1615 if (IID != Intrinsic::not_intrinsic) {
1616 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1617 {F->getReturnType()});
1618 return true;
1619 }
1620 }
1621
1622 // Check for nvvm intrinsics that need a return type adjustment.
1623 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1625 if (IID != Intrinsic::not_intrinsic) {
1626 NewFn = nullptr;
1627 return true;
1628 }
1629 }
1630
1631 // Upgrade Distributed Shared Memory Intrinsics
1633 if (IID != Intrinsic::not_intrinsic) {
1634 rename(F);
1635 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1636 return true;
1637 }
1638
1639 // Upgrade TMA copy G2S Intrinsics
1641 if (IID != Intrinsic::not_intrinsic) {
1642 rename(F);
1643 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1644 return true;
1645 }
1646
1647 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1648 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1649 //
1650 // TODO: We could add lohi.i2d.
1651 bool Expand = false;
1652 if (Name.consume_front("abs."))
1653 // nvvm.abs.{i,ii}
1654 Expand =
1655 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1656 else if (Name.consume_front("fabs."))
1657 // nvvm.fabs.{f,ftz.f,d}
1658 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1659 else if (Name.consume_front("ex2.approx."))
1660 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1661 Expand =
1662 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1663 else if (Name.consume_front("atomic.load."))
1664 // nvvm.atomic.load.add.{f32,f64}.p
1665 // nvvm.atomic.load.{inc,dec}.32.p
1666 Expand = StringSwitch<bool>(Name)
1667 .StartsWith("add.f32.p", true)
1668 .StartsWith("add.f64.p", true)
1669 .StartsWith("inc.32.p", true)
1670 .StartsWith("dec.32.p", true)
1671 .Default(false);
1672 else if (Name.consume_front("bitcast."))
1673 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1674 Expand =
1675 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1676 else if (Name.consume_front("rotate."))
1677 // nvvm.rotate.{b32,b64,right.b64}
1678 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1679 else if (Name.consume_front("ptr.gen.to."))
1680 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1681 Expand = consumeNVVMPtrAddrSpace(Name);
1682 else if (Name.consume_front("ptr."))
1683 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1684 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1685 else if (Name.consume_front("ldg.global."))
1686 // nvvm.ldg.global.{i,p,f}
1687 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1688 Name.starts_with("p."));
1689 else
1690 Expand = StringSwitch<bool>(Name)
1691 .Case("barrier0", true)
1692 .Case("barrier.n", true)
1693 .Case("barrier.sync.cnt", true)
1694 .Case("barrier.sync", true)
1695 .Case("barrier", true)
1696 .Case("bar.sync", true)
1697 .Case("barrier0.popc", true)
1698 .Case("barrier0.and", true)
1699 .Case("barrier0.or", true)
1700 .Case("clz.ll", true)
1701 .Case("popc.ll", true)
1702 .Case("h2f", true)
1703 .Case("swap.lo.hi.b64", true)
1704 .Case("tanh.approx.f32", true)
1705 .Default(false);
1706
1707 if (Expand) {
1708 NewFn = nullptr;
1709 return true;
1710 }
1711 break; // No other 'nvvm.*'.
1712 }
1713 break;
1714 }
1715 case 'o':
1716 if (Name.starts_with("objectsize.")) {
1717 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1718 if (F->arg_size() == 2 || F->arg_size() == 3) {
1719 rename(F);
1720 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1721 Intrinsic::objectsize, Tys);
1722 return true;
1723 }
1724 }
1725 break;
1726
1727 case 'p':
1728 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1729 rename(F);
1731 F->getParent(), Intrinsic::ptr_annotation,
1732 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1733 return true;
1734 }
1735 break;
1736
1737 case 'r': {
1738 if (Name.consume_front("riscv.")) {
1741 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1742 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1743 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1744 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1747 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1748 rename(F);
1749 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1750 return true;
1751 }
1752 break; // No other applicable upgrades.
1753 }
1754
1756 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1757 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1760 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1761 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1762 rename(F);
1763 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1764 return true;
1765 }
1766 break; // No other applicable upgrades.
1767 }
1768
1770 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1771 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1772 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1773 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1774 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1775 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1778 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1779 rename(F);
1780 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1781 return true;
1782 }
1783 break; // No other applicable upgrades.
1784 }
1785
1786 // Replace llvm.riscv.clmul with llvm.clmul.
1787 if (Name == "clmul.i32" || Name == "clmul.i64") {
1789 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1790 return true;
1791 }
1792
1793 break; // No other 'riscv.*' intrinsics
1794 }
1795 } break;
1796
1797 case 's':
1798 if (Name == "stackprotectorcheck") {
1799 NewFn = nullptr;
1800 return true;
1801 }
1802 break;
1803
1804 case 't':
1805 if (Name == "thread.pointer") {
1807 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1808 return true;
1809 }
1810 break;
1811
1812 case 'v': {
1813 if (Name == "var.annotation" && F->arg_size() == 4) {
1814 rename(F);
1816 F->getParent(), Intrinsic::var_annotation,
1817 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1818 return true;
1819 }
1820 if (Name.consume_front("vector.splice")) {
1821 if (Name.starts_with(".left") || Name.starts_with(".right"))
1822 break;
1823 return true;
1824 }
1825 break;
1826 }
1827
1828 case 'w':
1829 if (Name.consume_front("wasm.")) {
1832 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1833 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1834 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1837 rename(F);
1838 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1839 F->getReturnType());
1840 return true;
1841 }
1842
1843 if (Name.consume_front("dot.i8x16.i7x16.")) {
1845 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1846 .Case("add.signed",
1847 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1850 rename(F);
1851 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1852 return true;
1853 }
1854 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1855 }
1856 break; // No other 'wasm.*'.
1857 }
1858 break;
1859
1860 case 'x':
1861 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1862 return true;
1863 }
1864
1865 auto *ST = dyn_cast<StructType>(F->getReturnType());
1866 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1867 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1868 // Replace return type with literal non-packed struct. Only do this for
1869 // intrinsics declared to return a struct, not for intrinsics with
1870 // overloaded return type, in which case the exact struct type will be
1871 // mangled into the name.
1872 if (Intrinsic::hasStructReturnType(F->getIntrinsicID())) {
1873 FunctionType *FT = F->getFunctionType();
1874 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1875 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1876 std::string Name = F->getName().str();
1877 rename(F);
1878 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1879 Name, F->getParent());
1880
1881 // The new function may also need remangling.
1882 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1883 NewFn = *Result;
1884 return true;
1885 }
1886 }
1887
1888 // Remangle our intrinsic since we upgrade the mangling
1890 if (Result != std::nullopt) {
1891 NewFn = *Result;
1892 return true;
1893 }
1894
1895 // This may not belong here. This function is effectively being overloaded
1896 // to both detect an intrinsic which needs upgrading, and to provide the
1897 // upgraded form of the intrinsic. We should perhaps have two separate
1898 // functions for this.
1899 return false;
1900}
1901
1903 bool CanUpgradeDebugIntrinsicsToRecords) {
1904 NewFn = nullptr;
1905 bool Upgraded =
1906 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1907
1908 // Upgrade intrinsic attributes. This does not change the function.
1909 if (NewFn)
1910 F = NewFn;
1911 if (Intrinsic::ID id = F->getIntrinsicID()) {
1912 // Only do this if the intrinsic signature is valid.
1913 SmallVector<Type *> OverloadTys;
1914 if (Intrinsic::isSignatureValid(id, F->getFunctionType(), OverloadTys))
1915 F->setAttributes(
1916 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1917 }
1918 return Upgraded;
1919}
1920
1922 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1923 GV->getName() == "llvm.global_dtors")) ||
1924 !GV->hasInitializer())
1925 return nullptr;
1927 if (!ATy)
1928 return nullptr;
1930 if (!STy || STy->getNumElements() != 2)
1931 return nullptr;
1932
1933 LLVMContext &C = GV->getContext();
1934 IRBuilder<> IRB(C);
1935 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1936 IRB.getPtrTy());
1937 Constant *Init = GV->getInitializer();
1938 unsigned N = Init->getNumOperands();
1939 std::vector<Constant *> NewCtors(N);
1940 for (unsigned i = 0; i != N; ++i) {
1941 auto Ctor = cast<Constant>(Init->getOperand(i));
1942 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1943 Ctor->getAggregateElement(1),
1945 }
1946 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1947
1948 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1949 NewInit, GV->getName());
1950}
1951
1952// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1953// to byte shuffles.
1955 unsigned Shift) {
1956 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1957 unsigned NumElts = ResultTy->getNumElements() * 8;
1958
1959 // Bitcast from a 64-bit element type to a byte element type.
1960 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1961 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1962
1963 // We'll be shuffling in zeroes.
1964 Value *Res = Constant::getNullValue(VecTy);
1965
1966 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1967 // we'll just return the zero vector.
1968 if (Shift < 16) {
1969 int Idxs[64];
1970 // 256/512-bit version is split into 2/4 16-byte lanes.
1971 for (unsigned l = 0; l != NumElts; l += 16)
1972 for (unsigned i = 0; i != 16; ++i) {
1973 unsigned Idx = NumElts + i - Shift;
1974 if (Idx < NumElts)
1975 Idx -= NumElts - 16; // end of lane, switch operand.
1976 Idxs[l + i] = Idx + l;
1977 }
1978
1979 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1980 }
1981
1982 // Bitcast back to a 64-bit element type.
1983 return Builder.CreateBitCast(Res, ResultTy, "cast");
1984}
1985
1986// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1987// to byte shuffles.
1989 unsigned Shift) {
1990 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1991 unsigned NumElts = ResultTy->getNumElements() * 8;
1992
1993 // Bitcast from a 64-bit element type to a byte element type.
1994 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1995 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1996
1997 // We'll be shuffling in zeroes.
1998 Value *Res = Constant::getNullValue(VecTy);
1999
2000 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2001 // we'll just return the zero vector.
2002 if (Shift < 16) {
2003 int Idxs[64];
2004 // 256/512-bit version is split into 2/4 16-byte lanes.
2005 for (unsigned l = 0; l != NumElts; l += 16)
2006 for (unsigned i = 0; i != 16; ++i) {
2007 unsigned Idx = i + Shift;
2008 if (Idx >= 16)
2009 Idx += NumElts - 16; // end of lane, switch operand.
2010 Idxs[l + i] = Idx + l;
2011 }
2012
2013 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
2014 }
2015
2016 // Bitcast back to a 64-bit element type.
2017 return Builder.CreateBitCast(Res, ResultTy, "cast");
2018}
2019
2020static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2021 unsigned NumElts) {
2022 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2024 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2025 Mask = Builder.CreateBitCast(Mask, MaskTy);
2026
2027 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2028 // i8 and we need to extract down to the right number of elements.
2029 if (NumElts <= 4) {
2030 int Indices[4];
2031 for (unsigned i = 0; i != NumElts; ++i)
2032 Indices[i] = i;
2033 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2034 "extract");
2035 }
2036
2037 return Mask;
2038}
2039
2040static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2041 Value *Op1) {
2042 // If the mask is all ones just emit the first operation.
2043 if (const auto *C = dyn_cast<Constant>(Mask))
2044 if (C->isAllOnesValue())
2045 return Op0;
2046
2047 Mask = getX86MaskVec(Builder, Mask,
2048 cast<FixedVectorType>(Op0->getType())->getNumElements());
2049 return Builder.CreateSelect(Mask, Op0, Op1);
2050}
2051
2052static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2053 Value *Op1) {
2054 // If the mask is all ones just emit the first operation.
2055 if (const auto *C = dyn_cast<Constant>(Mask))
2056 if (C->isAllOnesValue())
2057 return Op0;
2058
2059 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2060 Mask->getType()->getIntegerBitWidth());
2061 Mask = Builder.CreateBitCast(Mask, MaskTy);
2062 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2063 return Builder.CreateSelect(Mask, Op0, Op1);
2064}
2065
2066// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2067// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2068// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2070 Value *Op1, Value *Shift,
2071 Value *Passthru, Value *Mask,
2072 bool IsVALIGN) {
2073 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2074
2075 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2076 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2077 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2078 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2079
2080 // Mask the immediate for VALIGN.
2081 if (IsVALIGN)
2082 ShiftVal &= (NumElts - 1);
2083
2084 // If palignr is shifting the pair of vectors more than the size of two
2085 // lanes, emit zero.
2086 if (ShiftVal >= 32)
2088
2089 // If palignr is shifting the pair of input vectors more than one lane,
2090 // but less than two lanes, convert to shifting in zeroes.
2091 if (ShiftVal > 16) {
2092 ShiftVal -= 16;
2093 Op1 = Op0;
2095 }
2096
2097 int Indices[64];
2098 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2099 for (unsigned l = 0; l < NumElts; l += 16) {
2100 for (unsigned i = 0; i != 16; ++i) {
2101 unsigned Idx = ShiftVal + i;
2102 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2103 Idx += NumElts - 16; // End of lane, switch operand.
2104 Indices[l + i] = Idx + l;
2105 }
2106 }
2107
2108 Value *Align = Builder.CreateShuffleVector(
2109 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2110
2111 return emitX86Select(Builder, Mask, Align, Passthru);
2112}
2113
2115 bool ZeroMask, bool IndexForm) {
2116 Type *Ty = CI.getType();
2117 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2118 unsigned EltWidth = Ty->getScalarSizeInBits();
2119 bool IsFloat = Ty->isFPOrFPVectorTy();
2120 Intrinsic::ID IID;
2121 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2123 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2125 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2127 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2129 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2131 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2133 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2135 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2137 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2139 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2141 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2143 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2145 else if (VecWidth == 128 && EltWidth == 16)
2146 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2147 else if (VecWidth == 256 && EltWidth == 16)
2148 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2149 else if (VecWidth == 512 && EltWidth == 16)
2150 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2151 else if (VecWidth == 128 && EltWidth == 8)
2152 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2153 else if (VecWidth == 256 && EltWidth == 8)
2154 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2155 else if (VecWidth == 512 && EltWidth == 8)
2156 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2157 else
2158 llvm_unreachable("Unexpected intrinsic");
2159
2160 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2161 CI.getArgOperand(2) };
2162
2163 // If this isn't index form we need to swap operand 0 and 1.
2164 if (!IndexForm)
2165 std::swap(Args[0], Args[1]);
2166
2167 Value *V = Builder.CreateIntrinsic(IID, Args);
2168 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2169 : Builder.CreateBitCast(CI.getArgOperand(1),
2170 Ty);
2171 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2172}
2173
2175 Intrinsic::ID IID) {
2176 Type *Ty = CI.getType();
2177 Value *Op0 = CI.getOperand(0);
2178 Value *Op1 = CI.getOperand(1);
2179 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2180
2181 if (CI.arg_size() == 4) { // For masked intrinsics.
2182 Value *VecSrc = CI.getOperand(2);
2183 Value *Mask = CI.getOperand(3);
2184 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2185 }
2186 return Res;
2187}
2188
2190 bool IsRotateRight) {
2191 Type *Ty = CI.getType();
2192 Value *Src = CI.getArgOperand(0);
2193 Value *Amt = CI.getArgOperand(1);
2194
2195 // Amount may be scalar immediate, in which case create a splat vector.
2196 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2197 // we only care about the lowest log2 bits anyway.
2198 if (Amt->getType() != Ty) {
2199 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2200 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2201 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2202 }
2203
2204 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2205 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2206
2207 if (CI.arg_size() == 4) { // For masked intrinsics.
2208 Value *VecSrc = CI.getOperand(2);
2209 Value *Mask = CI.getOperand(3);
2210 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2211 }
2212 return Res;
2213}
2214
2215static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2216 bool IsSigned) {
2217 Type *Ty = CI.getType();
2218 Value *LHS = CI.getArgOperand(0);
2219 Value *RHS = CI.getArgOperand(1);
2220
2221 CmpInst::Predicate Pred;
2222 switch (Imm) {
2223 case 0x0:
2224 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2225 break;
2226 case 0x1:
2227 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2228 break;
2229 case 0x2:
2230 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2231 break;
2232 case 0x3:
2233 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2234 break;
2235 case 0x4:
2236 Pred = ICmpInst::ICMP_EQ;
2237 break;
2238 case 0x5:
2239 Pred = ICmpInst::ICMP_NE;
2240 break;
2241 case 0x6:
2242 return Constant::getNullValue(Ty); // FALSE
2243 case 0x7:
2244 return Constant::getAllOnesValue(Ty); // TRUE
2245 default:
2246 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2247 }
2248
2249 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2250 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2251 return Ext;
2252}
2253
2255 bool IsShiftRight, bool ZeroMask) {
2256 Type *Ty = CI.getType();
2257 Value *Op0 = CI.getArgOperand(0);
2258 Value *Op1 = CI.getArgOperand(1);
2259 Value *Amt = CI.getArgOperand(2);
2260
2261 if (IsShiftRight)
2262 std::swap(Op0, Op1);
2263
2264 // Amount may be scalar immediate, in which case create a splat vector.
2265 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2266 // we only care about the lowest log2 bits anyway.
2267 if (Amt->getType() != Ty) {
2268 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2269 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2270 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2271 }
2272
2273 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2274 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2275
2276 unsigned NumArgs = CI.arg_size();
2277 if (NumArgs >= 4) { // For masked intrinsics.
2278 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2279 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2280 CI.getArgOperand(0);
2281 Value *Mask = CI.getOperand(NumArgs - 1);
2282 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2283 }
2284 return Res;
2285}
2286
2288 Value *Mask, bool Aligned) {
2289 const Align Alignment =
2290 Aligned
2291 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2292 : Align(1);
2293
2294 // If the mask is all ones just emit a regular store.
2295 if (const auto *C = dyn_cast<Constant>(Mask))
2296 if (C->isAllOnesValue())
2297 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2298
2299 // Convert the mask from an integer type to a vector of i1.
2300 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2301 Mask = getX86MaskVec(Builder, Mask, NumElts);
2302 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2303}
2304
2306 Value *Passthru, Value *Mask, bool Aligned) {
2307 Type *ValTy = Passthru->getType();
2308 const Align Alignment =
2309 Aligned
2310 ? Align(
2312 8)
2313 : Align(1);
2314
2315 // If the mask is all ones just emit a regular store.
2316 if (const auto *C = dyn_cast<Constant>(Mask))
2317 if (C->isAllOnesValue())
2318 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2319
2320 // Convert the mask from an integer type to a vector of i1.
2321 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2322 Mask = getX86MaskVec(Builder, Mask, NumElts);
2323 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2324}
2325
2326static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2327 Type *Ty = CI.getType();
2328 Value *Op0 = CI.getArgOperand(0);
2329 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2330 {Op0, Builder.getInt1(false)});
2331 if (CI.arg_size() == 3)
2332 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2333 return Res;
2334}
2335
2336static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2337 Type *Ty = CI.getType();
2338
2339 // Arguments have a vXi32 type so cast to vXi64.
2340 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2341 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2342
2343 if (IsSigned) {
2344 // Shift left then arithmetic shift right.
2345 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2346 LHS = Builder.CreateShl(LHS, ShiftAmt);
2347 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2348 RHS = Builder.CreateShl(RHS, ShiftAmt);
2349 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2350 } else {
2351 // Clear the upper bits.
2352 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2353 LHS = Builder.CreateAnd(LHS, Mask);
2354 RHS = Builder.CreateAnd(RHS, Mask);
2355 }
2356
2357 Value *Res = Builder.CreateMul(LHS, RHS);
2358
2359 if (CI.arg_size() == 4)
2360 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2361
2362 return Res;
2363}
2364
2365// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2367 Value *Mask) {
2368 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2369 if (Mask) {
2370 const auto *C = dyn_cast<Constant>(Mask);
2371 if (!C || !C->isAllOnesValue())
2372 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2373 }
2374
2375 if (NumElts < 8) {
2376 int Indices[8];
2377 for (unsigned i = 0; i != NumElts; ++i)
2378 Indices[i] = i;
2379 for (unsigned i = NumElts; i != 8; ++i)
2380 Indices[i] = NumElts + i % NumElts;
2381 Vec = Builder.CreateShuffleVector(Vec,
2383 Indices);
2384 }
2385 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2386}
2387
2389 unsigned CC, bool Signed) {
2390 Value *Op0 = CI.getArgOperand(0);
2391 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2392
2393 Value *Cmp;
2394 if (CC == 3) {
2396 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2397 } else if (CC == 7) {
2399 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2400 } else {
2402 switch (CC) {
2403 default: llvm_unreachable("Unknown condition code");
2404 case 0: Pred = ICmpInst::ICMP_EQ; break;
2405 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2406 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2407 case 4: Pred = ICmpInst::ICMP_NE; break;
2408 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2409 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2410 }
2411 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2412 }
2413
2414 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2415
2416 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2417}
2418
2419// Replace a masked intrinsic with an older unmasked intrinsic.
2421 Intrinsic::ID IID) {
2422 Value *Rep =
2423 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2424 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2425}
2426
2428 Value* A = CI.getArgOperand(0);
2429 Value* B = CI.getArgOperand(1);
2430 Value* Src = CI.getArgOperand(2);
2431 Value* Mask = CI.getArgOperand(3);
2432
2433 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2434 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2435 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2436 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2437 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2438 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2439}
2440
2442 Value* Op = CI.getArgOperand(0);
2443 Type* ReturnOp = CI.getType();
2444 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2445 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2446 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2447}
2448
2449// Replace intrinsic with unmasked version and a select.
2451 CallBase &CI, Value *&Rep) {
2452 Name = Name.substr(12); // Remove avx512.mask.
2453
2454 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2455 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2456 Intrinsic::ID IID;
2457 if (Name.starts_with("max.p")) {
2458 if (VecWidth == 128 && EltWidth == 32)
2459 IID = Intrinsic::x86_sse_max_ps;
2460 else if (VecWidth == 128 && EltWidth == 64)
2461 IID = Intrinsic::x86_sse2_max_pd;
2462 else if (VecWidth == 256 && EltWidth == 32)
2463 IID = Intrinsic::x86_avx_max_ps_256;
2464 else if (VecWidth == 256 && EltWidth == 64)
2465 IID = Intrinsic::x86_avx_max_pd_256;
2466 else
2467 llvm_unreachable("Unexpected intrinsic");
2468 } else if (Name.starts_with("min.p")) {
2469 if (VecWidth == 128 && EltWidth == 32)
2470 IID = Intrinsic::x86_sse_min_ps;
2471 else if (VecWidth == 128 && EltWidth == 64)
2472 IID = Intrinsic::x86_sse2_min_pd;
2473 else if (VecWidth == 256 && EltWidth == 32)
2474 IID = Intrinsic::x86_avx_min_ps_256;
2475 else if (VecWidth == 256 && EltWidth == 64)
2476 IID = Intrinsic::x86_avx_min_pd_256;
2477 else
2478 llvm_unreachable("Unexpected intrinsic");
2479 } else if (Name.starts_with("pshuf.b.")) {
2480 if (VecWidth == 128)
2481 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2482 else if (VecWidth == 256)
2483 IID = Intrinsic::x86_avx2_pshuf_b;
2484 else if (VecWidth == 512)
2485 IID = Intrinsic::x86_avx512_pshuf_b_512;
2486 else
2487 llvm_unreachable("Unexpected intrinsic");
2488 } else if (Name.starts_with("pmul.hr.sw.")) {
2489 if (VecWidth == 128)
2490 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2491 else if (VecWidth == 256)
2492 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2493 else if (VecWidth == 512)
2494 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2495 else
2496 llvm_unreachable("Unexpected intrinsic");
2497 } else if (Name.starts_with("pmulh.w.")) {
2498 if (VecWidth == 128)
2499 IID = Intrinsic::x86_sse2_pmulh_w;
2500 else if (VecWidth == 256)
2501 IID = Intrinsic::x86_avx2_pmulh_w;
2502 else if (VecWidth == 512)
2503 IID = Intrinsic::x86_avx512_pmulh_w_512;
2504 else
2505 llvm_unreachable("Unexpected intrinsic");
2506 } else if (Name.starts_with("pmulhu.w.")) {
2507 if (VecWidth == 128)
2508 IID = Intrinsic::x86_sse2_pmulhu_w;
2509 else if (VecWidth == 256)
2510 IID = Intrinsic::x86_avx2_pmulhu_w;
2511 else if (VecWidth == 512)
2512 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2513 else
2514 llvm_unreachable("Unexpected intrinsic");
2515 } else if (Name.starts_with("pmaddw.d.")) {
2516 if (VecWidth == 128)
2517 IID = Intrinsic::x86_sse2_pmadd_wd;
2518 else if (VecWidth == 256)
2519 IID = Intrinsic::x86_avx2_pmadd_wd;
2520 else if (VecWidth == 512)
2521 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2522 else
2523 llvm_unreachable("Unexpected intrinsic");
2524 } else if (Name.starts_with("pmaddubs.w.")) {
2525 if (VecWidth == 128)
2526 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2527 else if (VecWidth == 256)
2528 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2529 else if (VecWidth == 512)
2530 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2531 else
2532 llvm_unreachable("Unexpected intrinsic");
2533 } else if (Name.starts_with("packsswb.")) {
2534 if (VecWidth == 128)
2535 IID = Intrinsic::x86_sse2_packsswb_128;
2536 else if (VecWidth == 256)
2537 IID = Intrinsic::x86_avx2_packsswb;
2538 else if (VecWidth == 512)
2539 IID = Intrinsic::x86_avx512_packsswb_512;
2540 else
2541 llvm_unreachable("Unexpected intrinsic");
2542 } else if (Name.starts_with("packssdw.")) {
2543 if (VecWidth == 128)
2544 IID = Intrinsic::x86_sse2_packssdw_128;
2545 else if (VecWidth == 256)
2546 IID = Intrinsic::x86_avx2_packssdw;
2547 else if (VecWidth == 512)
2548 IID = Intrinsic::x86_avx512_packssdw_512;
2549 else
2550 llvm_unreachable("Unexpected intrinsic");
2551 } else if (Name.starts_with("packuswb.")) {
2552 if (VecWidth == 128)
2553 IID = Intrinsic::x86_sse2_packuswb_128;
2554 else if (VecWidth == 256)
2555 IID = Intrinsic::x86_avx2_packuswb;
2556 else if (VecWidth == 512)
2557 IID = Intrinsic::x86_avx512_packuswb_512;
2558 else
2559 llvm_unreachable("Unexpected intrinsic");
2560 } else if (Name.starts_with("packusdw.")) {
2561 if (VecWidth == 128)
2562 IID = Intrinsic::x86_sse41_packusdw;
2563 else if (VecWidth == 256)
2564 IID = Intrinsic::x86_avx2_packusdw;
2565 else if (VecWidth == 512)
2566 IID = Intrinsic::x86_avx512_packusdw_512;
2567 else
2568 llvm_unreachable("Unexpected intrinsic");
2569 } else if (Name.starts_with("vpermilvar.")) {
2570 if (VecWidth == 128 && EltWidth == 32)
2571 IID = Intrinsic::x86_avx_vpermilvar_ps;
2572 else if (VecWidth == 128 && EltWidth == 64)
2573 IID = Intrinsic::x86_avx_vpermilvar_pd;
2574 else if (VecWidth == 256 && EltWidth == 32)
2575 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2576 else if (VecWidth == 256 && EltWidth == 64)
2577 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2578 else if (VecWidth == 512 && EltWidth == 32)
2579 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2580 else if (VecWidth == 512 && EltWidth == 64)
2581 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2582 else
2583 llvm_unreachable("Unexpected intrinsic");
2584 } else if (Name == "cvtpd2dq.256") {
2585 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2586 } else if (Name == "cvtpd2ps.256") {
2587 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2588 } else if (Name == "cvttpd2dq.256") {
2589 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2590 } else if (Name == "cvttps2dq.128") {
2591 IID = Intrinsic::x86_sse2_cvttps2dq;
2592 } else if (Name == "cvttps2dq.256") {
2593 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2594 } else if (Name.starts_with("permvar.")) {
2595 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2596 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2597 IID = Intrinsic::x86_avx2_permps;
2598 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2599 IID = Intrinsic::x86_avx2_permd;
2600 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2601 IID = Intrinsic::x86_avx512_permvar_df_256;
2602 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_di_256;
2604 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2605 IID = Intrinsic::x86_avx512_permvar_sf_512;
2606 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2607 IID = Intrinsic::x86_avx512_permvar_si_512;
2608 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2609 IID = Intrinsic::x86_avx512_permvar_df_512;
2610 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2611 IID = Intrinsic::x86_avx512_permvar_di_512;
2612 else if (VecWidth == 128 && EltWidth == 16)
2613 IID = Intrinsic::x86_avx512_permvar_hi_128;
2614 else if (VecWidth == 256 && EltWidth == 16)
2615 IID = Intrinsic::x86_avx512_permvar_hi_256;
2616 else if (VecWidth == 512 && EltWidth == 16)
2617 IID = Intrinsic::x86_avx512_permvar_hi_512;
2618 else if (VecWidth == 128 && EltWidth == 8)
2619 IID = Intrinsic::x86_avx512_permvar_qi_128;
2620 else if (VecWidth == 256 && EltWidth == 8)
2621 IID = Intrinsic::x86_avx512_permvar_qi_256;
2622 else if (VecWidth == 512 && EltWidth == 8)
2623 IID = Intrinsic::x86_avx512_permvar_qi_512;
2624 else
2625 llvm_unreachable("Unexpected intrinsic");
2626 } else if (Name.starts_with("dbpsadbw.")) {
2627 if (VecWidth == 128)
2628 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2629 else if (VecWidth == 256)
2630 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2631 else if (VecWidth == 512)
2632 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2633 else
2634 llvm_unreachable("Unexpected intrinsic");
2635 } else if (Name.starts_with("pmultishift.qb.")) {
2636 if (VecWidth == 128)
2637 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2638 else if (VecWidth == 256)
2639 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2640 else if (VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2642 else
2643 llvm_unreachable("Unexpected intrinsic");
2644 } else if (Name.starts_with("conflict.")) {
2645 if (Name[9] == 'd' && VecWidth == 128)
2646 IID = Intrinsic::x86_avx512_conflict_d_128;
2647 else if (Name[9] == 'd' && VecWidth == 256)
2648 IID = Intrinsic::x86_avx512_conflict_d_256;
2649 else if (Name[9] == 'd' && VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_conflict_d_512;
2651 else if (Name[9] == 'q' && VecWidth == 128)
2652 IID = Intrinsic::x86_avx512_conflict_q_128;
2653 else if (Name[9] == 'q' && VecWidth == 256)
2654 IID = Intrinsic::x86_avx512_conflict_q_256;
2655 else if (Name[9] == 'q' && VecWidth == 512)
2656 IID = Intrinsic::x86_avx512_conflict_q_512;
2657 else
2658 llvm_unreachable("Unexpected intrinsic");
2659 } else if (Name.starts_with("pavg.")) {
2660 if (Name[5] == 'b' && VecWidth == 128)
2661 IID = Intrinsic::x86_sse2_pavg_b;
2662 else if (Name[5] == 'b' && VecWidth == 256)
2663 IID = Intrinsic::x86_avx2_pavg_b;
2664 else if (Name[5] == 'b' && VecWidth == 512)
2665 IID = Intrinsic::x86_avx512_pavg_b_512;
2666 else if (Name[5] == 'w' && VecWidth == 128)
2667 IID = Intrinsic::x86_sse2_pavg_w;
2668 else if (Name[5] == 'w' && VecWidth == 256)
2669 IID = Intrinsic::x86_avx2_pavg_w;
2670 else if (Name[5] == 'w' && VecWidth == 512)
2671 IID = Intrinsic::x86_avx512_pavg_w_512;
2672 else
2673 llvm_unreachable("Unexpected intrinsic");
2674 } else
2675 return false;
2676
2677 SmallVector<Value *, 4> Args(CI.args());
2678 Args.pop_back();
2679 Args.pop_back();
2680 Rep = Builder.CreateIntrinsic(IID, Args);
2681 unsigned NumArgs = CI.arg_size();
2682 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2683 CI.getArgOperand(NumArgs - 2));
2684 return true;
2685}
2686
2687/// Upgrade comment in call to inline asm that represents an objc retain release
2688/// marker.
2689void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2690 size_t Pos;
2691 if (AsmStr->find("mov\tfp") == 0 &&
2692 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2693 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2694 AsmStr->replace(Pos, 1, ";");
2695 }
2696}
2697
2699 Function *F, IRBuilder<> &Builder) {
2700 Value *Rep = nullptr;
2701
2702 if (Name == "abs.i" || Name == "abs.ll") {
2703 Value *Arg = CI->getArgOperand(0);
2704 Rep = Builder.CreateIntrinsic(Intrinsic::abs, {Arg->getType()},
2705 {Arg, Builder.getTrue()},
2706 /*FMFSource=*/nullptr, "abs");
2707 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2708 Type *Ty = (Name == "abs.bf16")
2709 ? Builder.getBFloatTy()
2710 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2711 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2712 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2713 Rep = Builder.CreateBitCast(Abs, CI->getType());
2714 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2715 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2716 : Intrinsic::nvvm_fabs;
2717 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2718 } else if (Name.consume_front("ex2.approx.")) {
2719 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2720 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2721 : Intrinsic::nvvm_ex2_approx;
2722 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2723 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2724 Name.starts_with("atomic.load.add.f64.p")) {
2725 Value *Ptr = CI->getArgOperand(0);
2726 Value *Val = CI->getArgOperand(1);
2727 Rep = Builder.CreateAtomicRMW(
2729 CI->getContext().getOrInsertSyncScopeID("device"));
2730 // The default scope for atomic.load.* intrinsics is device
2731 // (= gpu scope in ptx), but the default LLVM atomic scope is
2732 // "system"
2733 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2734 Name.starts_with("atomic.load.dec.32.p")) {
2735 Value *Ptr = CI->getArgOperand(0);
2736 Value *Val = CI->getArgOperand(1);
2737 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2739 Rep = Builder.CreateAtomicRMW(
2741 CI->getContext().getOrInsertSyncScopeID("device"));
2742 // See comment above.
2743 } else if (Name == "clz.ll") {
2744 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2745 Value *Arg = CI->getArgOperand(0);
2746 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2747 {Arg, Builder.getFalse()},
2748 /*FMFSource=*/nullptr, "ctlz");
2749 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2750 } else if (Name == "popc.ll") {
2751 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2752 // i64.
2753 Value *Arg = CI->getArgOperand(0);
2754 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2755 Arg, /*FMFSource=*/nullptr, "ctpop");
2756 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2757 } else if (Name == "h2f") {
2758 Value *Cast =
2759 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2760 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2761 } else if (Name.consume_front("bitcast.") &&
2762 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2763 Name == "d2ll")) {
2764 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2765 } else if (Name == "rotate.b32") {
2766 Value *Arg = CI->getOperand(0);
2767 Value *ShiftAmt = CI->getOperand(1);
2768 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2769 {Arg, Arg, ShiftAmt});
2770 } else if (Name == "rotate.b64") {
2771 Type *Int64Ty = Builder.getInt64Ty();
2772 Value *Arg = CI->getOperand(0);
2773 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2774 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2775 {Arg, Arg, ZExtShiftAmt});
2776 } else if (Name == "rotate.right.b64") {
2777 Type *Int64Ty = Builder.getInt64Ty();
2778 Value *Arg = CI->getOperand(0);
2779 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2780 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2781 {Arg, Arg, ZExtShiftAmt});
2782 } else if (Name == "swap.lo.hi.b64") {
2783 Type *Int64Ty = Builder.getInt64Ty();
2784 Value *Arg = CI->getOperand(0);
2785 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2786 {Arg, Arg, Builder.getInt64(32)});
2787 } else if ((Name.consume_front("ptr.gen.to.") &&
2788 consumeNVVMPtrAddrSpace(Name)) ||
2789 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2790 Name.starts_with(".to.gen"))) {
2791 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2792 } else if (Name.consume_front("ldg.global")) {
2793 Value *Ptr = CI->getArgOperand(0);
2794 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2795 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2796 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2797 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2798 MDNode *MD = MDNode::get(Builder.getContext(), {});
2799 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2800 return LD;
2801 } else if (Name == "tanh.approx.f32") {
2802 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2803 FastMathFlags FMF;
2804 FMF.setApproxFunc();
2805 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2806 FMF);
2807 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2808 Value *Arg =
2809 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2810 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2811 {}, {Arg});
2812 } else if (Name == "barrier") {
2813 Rep = Builder.CreateIntrinsic(
2814 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2815 {CI->getArgOperand(0), CI->getArgOperand(1)});
2816 } else if (Name == "barrier.sync") {
2817 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2818 {CI->getArgOperand(0)});
2819 } else if (Name == "barrier.sync.cnt") {
2820 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2821 {CI->getArgOperand(0), CI->getArgOperand(1)});
2822 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2823 Name == "barrier0.or") {
2824 Value *C = CI->getArgOperand(0);
2825 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2826
2827 Intrinsic::ID IID =
2829 .Case("barrier0.popc",
2830 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2831 .Case("barrier0.and",
2832 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2833 .Case("barrier0.or",
2834 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2835 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2836 Rep = Builder.CreateZExt(Bar, CI->getType());
2837 } else {
2839 if (IID != Intrinsic::not_intrinsic &&
2840 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2841 rename(F);
2842 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2844 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2845 Value *Arg = CI->getArgOperand(I);
2846 Type *OldType = Arg->getType();
2847 Type *NewType = NewFn->getArg(I)->getType();
2848 Args.push_back(
2849 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2850 ? Builder.CreateBitCast(Arg, NewType)
2851 : Arg);
2852 }
2853 Rep = Builder.CreateCall(NewFn, Args);
2854 if (F->getReturnType()->isIntegerTy())
2855 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2856 }
2857 }
2858
2859 return Rep;
2860}
2861
2863 IRBuilder<> &Builder) {
2864 LLVMContext &C = F->getContext();
2865 Value *Rep = nullptr;
2866
2867 if (Name.starts_with("sse4a.movnt.")) {
2869 Elts.push_back(
2870 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2871 MDNode *Node = MDNode::get(C, Elts);
2872
2873 Value *Arg0 = CI->getArgOperand(0);
2874 Value *Arg1 = CI->getArgOperand(1);
2875
2876 // Nontemporal (unaligned) store of the 0'th element of the float/double
2877 // vector.
2878 Value *Extract =
2879 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2880
2881 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2882 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2883 } else if (Name.starts_with("avx.movnt.") ||
2884 Name.starts_with("avx512.storent.")) {
2886 Elts.push_back(
2887 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2888 MDNode *Node = MDNode::get(C, Elts);
2889
2890 Value *Arg0 = CI->getArgOperand(0);
2891 Value *Arg1 = CI->getArgOperand(1);
2892
2893 StoreInst *SI = Builder.CreateAlignedStore(
2894 Arg1, Arg0,
2896 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2897 } else if (Name == "sse2.storel.dq") {
2898 Value *Arg0 = CI->getArgOperand(0);
2899 Value *Arg1 = CI->getArgOperand(1);
2900
2901 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2902 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2903 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2904 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2905 } else if (Name.starts_with("sse.storeu.") ||
2906 Name.starts_with("sse2.storeu.") ||
2907 Name.starts_with("avx.storeu.")) {
2908 Value *Arg0 = CI->getArgOperand(0);
2909 Value *Arg1 = CI->getArgOperand(1);
2910 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2911 } else if (Name == "avx512.mask.store.ss") {
2912 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2913 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2914 Mask, false);
2915 } else if (Name.starts_with("avx512.mask.store")) {
2916 // "avx512.mask.storeu." or "avx512.mask.store."
2917 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2918 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2919 CI->getArgOperand(2), Aligned);
2920 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2921 // Upgrade packed integer vector compare intrinsics to compare instructions.
2922 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2923 bool CmpEq = Name[9] == 'e';
2924 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2925 CI->getArgOperand(0), CI->getArgOperand(1));
2926 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2927 } else if (Name.starts_with("avx512.broadcastm")) {
2928 Type *ExtTy = Type::getInt32Ty(C);
2929 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2930 ExtTy = Type::getInt64Ty(C);
2931 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2932 ExtTy->getPrimitiveSizeInBits();
2933 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2934 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2935 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2936 Value *Vec = CI->getArgOperand(0);
2937 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2938 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2939 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2940 } else if (Name.starts_with("avx.sqrt.p") ||
2941 Name.starts_with("sse2.sqrt.p") ||
2942 Name.starts_with("sse.sqrt.p")) {
2943 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2944 {CI->getArgOperand(0)});
2945 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2946 if (CI->arg_size() == 4 &&
2947 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2948 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2949 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2950 : Intrinsic::x86_avx512_sqrt_pd_512;
2951
2952 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2953 Rep = Builder.CreateIntrinsic(IID, Args);
2954 } else {
2955 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2956 {CI->getArgOperand(0)});
2957 }
2958 Rep =
2959 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2960 } else if (Name.starts_with("avx512.ptestm") ||
2961 Name.starts_with("avx512.ptestnm")) {
2962 Value *Op0 = CI->getArgOperand(0);
2963 Value *Op1 = CI->getArgOperand(1);
2964 Value *Mask = CI->getArgOperand(2);
2965 Rep = Builder.CreateAnd(Op0, Op1);
2966 llvm::Type *Ty = Op0->getType();
2968 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2971 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2972 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2973 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2974 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2975 ->getNumElements();
2976 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2977 Rep =
2978 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2979 } else if (Name.starts_with("avx512.kunpck")) {
2980 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2981 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2982 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2983 int Indices[64];
2984 for (unsigned i = 0; i != NumElts; ++i)
2985 Indices[i] = i;
2986
2987 // First extract half of each vector. This gives better codegen than
2988 // doing it in a single shuffle.
2989 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2990 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2991 // Concat the vectors.
2992 // NOTE: Operands have to be swapped to match intrinsic definition.
2993 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2994 Rep = Builder.CreateBitCast(Rep, CI->getType());
2995 } else if (Name == "avx512.kand.w") {
2996 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2997 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2998 Rep = Builder.CreateAnd(LHS, RHS);
2999 Rep = Builder.CreateBitCast(Rep, CI->getType());
3000 } else if (Name == "avx512.kandn.w") {
3001 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3002 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3003 LHS = Builder.CreateNot(LHS);
3004 Rep = Builder.CreateAnd(LHS, RHS);
3005 Rep = Builder.CreateBitCast(Rep, CI->getType());
3006 } else if (Name == "avx512.kor.w") {
3007 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3008 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3009 Rep = Builder.CreateOr(LHS, RHS);
3010 Rep = Builder.CreateBitCast(Rep, CI->getType());
3011 } else if (Name == "avx512.kxor.w") {
3012 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3013 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3014 Rep = Builder.CreateXor(LHS, RHS);
3015 Rep = Builder.CreateBitCast(Rep, CI->getType());
3016 } else if (Name == "avx512.kxnor.w") {
3017 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3018 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3019 LHS = Builder.CreateNot(LHS);
3020 Rep = Builder.CreateXor(LHS, RHS);
3021 Rep = Builder.CreateBitCast(Rep, CI->getType());
3022 } else if (Name == "avx512.knot.w") {
3023 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3024 Rep = Builder.CreateNot(Rep);
3025 Rep = Builder.CreateBitCast(Rep, CI->getType());
3026 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3027 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3028 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3029 Rep = Builder.CreateOr(LHS, RHS);
3030 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3031 Value *C;
3032 if (Name[14] == 'c')
3033 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3034 else
3035 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3036 Rep = Builder.CreateICmpEQ(Rep, C);
3037 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3038 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3039 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3040 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3041 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3042 Type *I32Ty = Type::getInt32Ty(C);
3043 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3044 ConstantInt::get(I32Ty, 0));
3045 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3046 ConstantInt::get(I32Ty, 0));
3047 Value *EltOp;
3048 if (Name.contains(".add."))
3049 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3050 else if (Name.contains(".sub."))
3051 EltOp = Builder.CreateFSub(Elt0, Elt1);
3052 else if (Name.contains(".mul."))
3053 EltOp = Builder.CreateFMul(Elt0, Elt1);
3054 else
3055 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3056 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3057 ConstantInt::get(I32Ty, 0));
3058 } else if (Name.starts_with("avx512.mask.pcmp")) {
3059 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3060 bool CmpEq = Name[16] == 'e';
3061 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3062 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3063 Type *OpTy = CI->getArgOperand(0)->getType();
3064 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3065 Intrinsic::ID IID;
3066 switch (VecWidth) {
3067 default:
3068 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3069 break;
3070 case 128:
3071 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3072 break;
3073 case 256:
3074 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3075 break;
3076 case 512:
3077 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3078 break;
3079 }
3080
3081 Rep =
3082 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3083 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3084 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3085 Type *OpTy = CI->getArgOperand(0)->getType();
3086 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3087 unsigned EltWidth = OpTy->getScalarSizeInBits();
3088 Intrinsic::ID IID;
3089 if (VecWidth == 128 && EltWidth == 32)
3090 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3091 else if (VecWidth == 256 && EltWidth == 32)
3092 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3093 else if (VecWidth == 512 && EltWidth == 32)
3094 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3095 else if (VecWidth == 128 && EltWidth == 64)
3096 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3097 else if (VecWidth == 256 && EltWidth == 64)
3098 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3099 else if (VecWidth == 512 && EltWidth == 64)
3100 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3101 else
3102 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3103
3104 Rep =
3105 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3106 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3107 } else if (Name.starts_with("avx512.cmp.p")) {
3108 SmallVector<Value *, 4> Args(CI->args());
3109 Type *OpTy = Args[0]->getType();
3110 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3111 unsigned EltWidth = OpTy->getScalarSizeInBits();
3112 Intrinsic::ID IID;
3113 if (VecWidth == 128 && EltWidth == 32)
3114 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3115 else if (VecWidth == 256 && EltWidth == 32)
3116 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3117 else if (VecWidth == 512 && EltWidth == 32)
3118 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3119 else if (VecWidth == 128 && EltWidth == 64)
3120 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3121 else if (VecWidth == 256 && EltWidth == 64)
3122 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3123 else if (VecWidth == 512 && EltWidth == 64)
3124 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3125 else
3126 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3127
3129 if (VecWidth == 512)
3130 std::swap(Mask, Args.back());
3131 Args.push_back(Mask);
3132
3133 Rep = Builder.CreateIntrinsic(IID, Args);
3134 } else if (Name.starts_with("avx512.mask.cmp.")) {
3135 // Integer compare intrinsics.
3136 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3137 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3138 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3139 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3140 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3141 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3142 Name.starts_with("avx512.cvtw2mask.") ||
3143 Name.starts_with("avx512.cvtd2mask.") ||
3144 Name.starts_with("avx512.cvtq2mask.")) {
3145 Value *Op = CI->getArgOperand(0);
3146 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3147 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3148 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3149 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3150 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3151 Name.starts_with("avx512.mask.pabs")) {
3152 Rep = upgradeAbs(Builder, *CI);
3153 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3154 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3155 Name.starts_with("avx512.mask.pmaxs")) {
3156 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3157 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3158 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3159 Name.starts_with("avx512.mask.pmaxu")) {
3160 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3161 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3162 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3163 Name.starts_with("avx512.mask.pmins")) {
3164 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3165 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3166 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3167 Name.starts_with("avx512.mask.pminu")) {
3168 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3169 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3170 Name == "avx512.pmulu.dq.512" ||
3171 Name.starts_with("avx512.mask.pmulu.dq.")) {
3172 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3173 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3174 Name == "avx512.pmul.dq.512" ||
3175 Name.starts_with("avx512.mask.pmul.dq.")) {
3176 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3177 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3178 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3179 Rep =
3180 Builder.CreateSIToFP(CI->getArgOperand(1),
3181 cast<VectorType>(CI->getType())->getElementType());
3182 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3183 } else if (Name == "avx512.cvtusi2sd") {
3184 Rep =
3185 Builder.CreateUIToFP(CI->getArgOperand(1),
3186 cast<VectorType>(CI->getType())->getElementType());
3187 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3188 } else if (Name == "sse2.cvtss2sd") {
3189 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3190 Rep = Builder.CreateFPExt(
3191 Rep, cast<VectorType>(CI->getType())->getElementType());
3192 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3193 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3194 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3195 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3196 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3197 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3198 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3199 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3200 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3201 Name == "avx512.mask.cvtqq2ps.256" ||
3202 Name == "avx512.mask.cvtqq2ps.512" ||
3203 Name == "avx512.mask.cvtuqq2ps.256" ||
3204 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3205 Name == "avx.cvt.ps2.pd.256" ||
3206 Name == "avx512.mask.cvtps2pd.128" ||
3207 Name == "avx512.mask.cvtps2pd.256") {
3208 auto *DstTy = cast<FixedVectorType>(CI->getType());
3209 Rep = CI->getArgOperand(0);
3210 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3211
3212 unsigned NumDstElts = DstTy->getNumElements();
3213 if (NumDstElts < SrcTy->getNumElements()) {
3214 assert(NumDstElts == 2 && "Unexpected vector size");
3215 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3216 }
3217
3218 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3219 bool IsUnsigned = Name.contains("cvtu");
3220 if (IsPS2PD)
3221 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3222 else if (CI->arg_size() == 4 &&
3223 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3224 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3225 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3226 : Intrinsic::x86_avx512_sitofp_round;
3227 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3228 {Rep, CI->getArgOperand(3)});
3229 } else {
3230 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3231 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3232 }
3233
3234 if (CI->arg_size() >= 3)
3235 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3236 CI->getArgOperand(1));
3237 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3238 Name.starts_with("vcvtph2ps.")) {
3239 auto *DstTy = cast<FixedVectorType>(CI->getType());
3240 Rep = CI->getArgOperand(0);
3241 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3242 unsigned NumDstElts = DstTy->getNumElements();
3243 if (NumDstElts != SrcTy->getNumElements()) {
3244 assert(NumDstElts == 4 && "Unexpected vector size");
3245 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3246 }
3247 Rep = Builder.CreateBitCast(
3248 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3249 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3250 if (CI->arg_size() >= 3)
3251 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3252 CI->getArgOperand(1));
3253 } else if (Name.starts_with("avx512.mask.load")) {
3254 // "avx512.mask.loadu." or "avx512.mask.load."
3255 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3256 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3257 CI->getArgOperand(2), Aligned);
3258 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3259 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3260 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3261 ResultTy->getNumElements());
3262
3263 Rep = Builder.CreateIntrinsic(
3264 Intrinsic::masked_expandload, ResultTy,
3265 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3266 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3267 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3268 Value *MaskVec =
3269 getX86MaskVec(Builder, CI->getArgOperand(2),
3270 cast<FixedVectorType>(ResultTy)->getNumElements());
3271
3272 Rep = Builder.CreateIntrinsic(
3273 Intrinsic::masked_compressstore, ResultTy,
3274 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3275 } else if (Name.starts_with("avx512.mask.compress.") ||
3276 Name.starts_with("avx512.mask.expand.")) {
3277 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3278
3279 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3280 ResultTy->getNumElements());
3281
3282 bool IsCompress = Name[12] == 'c';
3283 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3284 : Intrinsic::x86_avx512_mask_expand;
3285 Rep = Builder.CreateIntrinsic(
3286 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3287 } else if (Name.starts_with("xop.vpcom")) {
3288 bool IsSigned;
3289 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3290 Name.ends_with("uq"))
3291 IsSigned = false;
3292 else if (Name.ends_with("b") || Name.ends_with("w") ||
3293 Name.ends_with("d") || Name.ends_with("q"))
3294 IsSigned = true;
3295 else
3296 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3297
3298 unsigned Imm;
3299 if (CI->arg_size() == 3) {
3300 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3301 } else {
3302 Name = Name.substr(9); // strip off "xop.vpcom"
3303 if (Name.starts_with("lt"))
3304 Imm = 0;
3305 else if (Name.starts_with("le"))
3306 Imm = 1;
3307 else if (Name.starts_with("gt"))
3308 Imm = 2;
3309 else if (Name.starts_with("ge"))
3310 Imm = 3;
3311 else if (Name.starts_with("eq"))
3312 Imm = 4;
3313 else if (Name.starts_with("ne"))
3314 Imm = 5;
3315 else if (Name.starts_with("false"))
3316 Imm = 6;
3317 else if (Name.starts_with("true"))
3318 Imm = 7;
3319 else
3320 llvm_unreachable("Unknown condition");
3321 }
3322
3323 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3324 } else if (Name.starts_with("xop.vpcmov")) {
3325 Value *Sel = CI->getArgOperand(2);
3326 Value *NotSel = Builder.CreateNot(Sel);
3327 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3328 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3329 Rep = Builder.CreateOr(Sel0, Sel1);
3330 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3331 Name.starts_with("avx512.mask.prol")) {
3332 Rep = upgradeX86Rotate(Builder, *CI, false);
3333 } else if (Name.starts_with("avx512.pror") ||
3334 Name.starts_with("avx512.mask.pror")) {
3335 Rep = upgradeX86Rotate(Builder, *CI, true);
3336 } else if (Name.starts_with("avx512.vpshld.") ||
3337 Name.starts_with("avx512.mask.vpshld") ||
3338 Name.starts_with("avx512.maskz.vpshld")) {
3339 bool ZeroMask = Name[11] == 'z';
3340 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3341 } else if (Name.starts_with("avx512.vpshrd.") ||
3342 Name.starts_with("avx512.mask.vpshrd") ||
3343 Name.starts_with("avx512.maskz.vpshrd")) {
3344 bool ZeroMask = Name[11] == 'z';
3345 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3346 } else if (Name == "sse42.crc32.64.8") {
3347 Value *Trunc0 =
3348 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3349 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3350 {Trunc0, CI->getArgOperand(1)});
3351 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3352 } else if (Name.starts_with("avx.vbroadcast.s") ||
3353 Name.starts_with("avx512.vbroadcast.s")) {
3354 // Replace broadcasts with a series of insertelements.
3355 auto *VecTy = cast<FixedVectorType>(CI->getType());
3356 Type *EltTy = VecTy->getElementType();
3357 unsigned EltNum = VecTy->getNumElements();
3358 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3359 Type *I32Ty = Type::getInt32Ty(C);
3360 Rep = PoisonValue::get(VecTy);
3361 for (unsigned I = 0; I < EltNum; ++I)
3362 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3363 } else if (Name.starts_with("sse41.pmovsx") ||
3364 Name.starts_with("sse41.pmovzx") ||
3365 Name.starts_with("avx2.pmovsx") ||
3366 Name.starts_with("avx2.pmovzx") ||
3367 Name.starts_with("avx512.mask.pmovsx") ||
3368 Name.starts_with("avx512.mask.pmovzx")) {
3369 auto *DstTy = cast<FixedVectorType>(CI->getType());
3370 unsigned NumDstElts = DstTy->getNumElements();
3371
3372 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3373 SmallVector<int, 8> ShuffleMask(NumDstElts);
3374 for (unsigned i = 0; i != NumDstElts; ++i)
3375 ShuffleMask[i] = i;
3376
3377 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3378
3379 bool DoSext = Name.contains("pmovsx");
3380 Rep =
3381 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3382 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3383 if (CI->arg_size() == 3)
3384 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3385 CI->getArgOperand(1));
3386 } else if (Name == "avx512.mask.pmov.qd.256" ||
3387 Name == "avx512.mask.pmov.qd.512" ||
3388 Name == "avx512.mask.pmov.wb.256" ||
3389 Name == "avx512.mask.pmov.wb.512") {
3390 Type *Ty = CI->getArgOperand(1)->getType();
3391 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3392 Rep =
3393 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3394 } else if (Name.starts_with("avx.vbroadcastf128") ||
3395 Name == "avx2.vbroadcasti128") {
3396 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3397 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3398 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3399 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3400 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3401 if (NumSrcElts == 2)
3402 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3403 else
3404 Rep = Builder.CreateShuffleVector(Load,
3405 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3406 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3407 Name.starts_with("avx512.mask.shuf.f")) {
3408 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3409 Type *VT = CI->getType();
3410 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3411 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3412 unsigned ControlBitsMask = NumLanes - 1;
3413 unsigned NumControlBits = NumLanes / 2;
3414 SmallVector<int, 8> ShuffleMask(0);
3415
3416 for (unsigned l = 0; l != NumLanes; ++l) {
3417 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3418 // We actually need the other source.
3419 if (l >= NumLanes / 2)
3420 LaneMask += NumLanes;
3421 for (unsigned i = 0; i != NumElementsInLane; ++i)
3422 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3423 }
3424 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3425 CI->getArgOperand(1), ShuffleMask);
3426 Rep =
3427 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3428 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3429 Name.starts_with("avx512.mask.broadcasti")) {
3430 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3431 ->getNumElements();
3432 unsigned NumDstElts =
3433 cast<FixedVectorType>(CI->getType())->getNumElements();
3434
3435 SmallVector<int, 8> ShuffleMask(NumDstElts);
3436 for (unsigned i = 0; i != NumDstElts; ++i)
3437 ShuffleMask[i] = i % NumSrcElts;
3438
3439 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3440 CI->getArgOperand(0), ShuffleMask);
3441 Rep =
3442 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3443 } else if (Name.starts_with("avx2.pbroadcast") ||
3444 Name.starts_with("avx2.vbroadcast") ||
3445 Name.starts_with("avx512.pbroadcast") ||
3446 Name.starts_with("avx512.mask.broadcast.s")) {
3447 // Replace vp?broadcasts with a vector shuffle.
3448 Value *Op = CI->getArgOperand(0);
3449 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3450 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3453 Rep = Builder.CreateShuffleVector(Op, M);
3454
3455 if (CI->arg_size() == 3)
3456 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3457 CI->getArgOperand(1));
3458 } else if (Name.starts_with("sse2.padds.") ||
3459 Name.starts_with("avx2.padds.") ||
3460 Name.starts_with("avx512.padds.") ||
3461 Name.starts_with("avx512.mask.padds.")) {
3462 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3463 } else if (Name.starts_with("sse2.psubs.") ||
3464 Name.starts_with("avx2.psubs.") ||
3465 Name.starts_with("avx512.psubs.") ||
3466 Name.starts_with("avx512.mask.psubs.")) {
3467 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3468 } else if (Name.starts_with("sse2.paddus.") ||
3469 Name.starts_with("avx2.paddus.") ||
3470 Name.starts_with("avx512.mask.paddus.")) {
3471 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3472 } else if (Name.starts_with("sse2.psubus.") ||
3473 Name.starts_with("avx2.psubus.") ||
3474 Name.starts_with("avx512.mask.psubus.")) {
3475 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3476 } else if (Name.starts_with("avx512.mask.palignr.")) {
3477 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3478 CI->getArgOperand(1), CI->getArgOperand(2),
3479 CI->getArgOperand(3), CI->getArgOperand(4),
3480 false);
3481 } else if (Name.starts_with("avx512.mask.valign.")) {
3483 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3484 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3485 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3486 // 128/256-bit shift left specified in bits.
3487 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3488 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3489 Shift / 8); // Shift is in bits.
3490 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3491 // 128/256-bit shift right specified in bits.
3492 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3493 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3494 Shift / 8); // Shift is in bits.
3495 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3496 Name == "avx512.psll.dq.512") {
3497 // 128/256/512-bit shift left specified in bytes.
3498 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3499 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3500 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3501 Name == "avx512.psrl.dq.512") {
3502 // 128/256/512-bit shift right specified in bytes.
3503 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3504 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3505 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3506 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3507 Name.starts_with("avx2.pblendd.")) {
3508 Value *Op0 = CI->getArgOperand(0);
3509 Value *Op1 = CI->getArgOperand(1);
3510 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3511 auto *VecTy = cast<FixedVectorType>(CI->getType());
3512 unsigned NumElts = VecTy->getNumElements();
3513
3514 SmallVector<int, 16> Idxs(NumElts);
3515 for (unsigned i = 0; i != NumElts; ++i)
3516 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3517
3518 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3519 } else if (Name.starts_with("avx.vinsertf128.") ||
3520 Name == "avx2.vinserti128" ||
3521 Name.starts_with("avx512.mask.insert")) {
3522 Value *Op0 = CI->getArgOperand(0);
3523 Value *Op1 = CI->getArgOperand(1);
3524 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3525 unsigned DstNumElts =
3526 cast<FixedVectorType>(CI->getType())->getNumElements();
3527 unsigned SrcNumElts =
3528 cast<FixedVectorType>(Op1->getType())->getNumElements();
3529 unsigned Scale = DstNumElts / SrcNumElts;
3530
3531 // Mask off the high bits of the immediate value; hardware ignores those.
3532 Imm = Imm % Scale;
3533
3534 // Extend the second operand into a vector the size of the destination.
3535 SmallVector<int, 8> Idxs(DstNumElts);
3536 for (unsigned i = 0; i != SrcNumElts; ++i)
3537 Idxs[i] = i;
3538 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3539 Idxs[i] = SrcNumElts;
3540 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3541
3542 // Insert the second operand into the first operand.
3543
3544 // Note that there is no guarantee that instruction lowering will actually
3545 // produce a vinsertf128 instruction for the created shuffles. In
3546 // particular, the 0 immediate case involves no lane changes, so it can
3547 // be handled as a blend.
3548
3549 // Example of shuffle mask for 32-bit elements:
3550 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3551 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3552
3553 // First fill with identify mask.
3554 for (unsigned i = 0; i != DstNumElts; ++i)
3555 Idxs[i] = i;
3556 // Then replace the elements where we need to insert.
3557 for (unsigned i = 0; i != SrcNumElts; ++i)
3558 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3559 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3560
3561 // If the intrinsic has a mask operand, handle that.
3562 if (CI->arg_size() == 5)
3563 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3564 CI->getArgOperand(3));
3565 } else if (Name.starts_with("avx.vextractf128.") ||
3566 Name == "avx2.vextracti128" ||
3567 Name.starts_with("avx512.mask.vextract")) {
3568 Value *Op0 = CI->getArgOperand(0);
3569 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3570 unsigned DstNumElts =
3571 cast<FixedVectorType>(CI->getType())->getNumElements();
3572 unsigned SrcNumElts =
3573 cast<FixedVectorType>(Op0->getType())->getNumElements();
3574 unsigned Scale = SrcNumElts / DstNumElts;
3575
3576 // Mask off the high bits of the immediate value; hardware ignores those.
3577 Imm = Imm % Scale;
3578
3579 // Get indexes for the subvector of the input vector.
3580 SmallVector<int, 8> Idxs(DstNumElts);
3581 for (unsigned i = 0; i != DstNumElts; ++i) {
3582 Idxs[i] = i + (Imm * DstNumElts);
3583 }
3584 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3585
3586 // If the intrinsic has a mask operand, handle that.
3587 if (CI->arg_size() == 4)
3588 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3589 CI->getArgOperand(2));
3590 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3591 Name.starts_with("avx512.mask.perm.di.")) {
3592 Value *Op0 = CI->getArgOperand(0);
3593 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3594 auto *VecTy = cast<FixedVectorType>(CI->getType());
3595 unsigned NumElts = VecTy->getNumElements();
3596
3597 SmallVector<int, 8> Idxs(NumElts);
3598 for (unsigned i = 0; i != NumElts; ++i)
3599 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3600
3601 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3602
3603 if (CI->arg_size() == 4)
3604 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3605 CI->getArgOperand(2));
3606 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3607 // The immediate permute control byte looks like this:
3608 // [1:0] - select 128 bits from sources for low half of destination
3609 // [2] - ignore
3610 // [3] - zero low half of destination
3611 // [5:4] - select 128 bits from sources for high half of destination
3612 // [6] - ignore
3613 // [7] - zero high half of destination
3614
3615 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3616
3617 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3618 unsigned HalfSize = NumElts / 2;
3619 SmallVector<int, 8> ShuffleMask(NumElts);
3620
3621 // Determine which operand(s) are actually in use for this instruction.
3622 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3623 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3624
3625 // If needed, replace operands based on zero mask.
3626 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3627 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3628
3629 // Permute low half of result.
3630 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3631 for (unsigned i = 0; i < HalfSize; ++i)
3632 ShuffleMask[i] = StartIndex + i;
3633
3634 // Permute high half of result.
3635 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3636 for (unsigned i = 0; i < HalfSize; ++i)
3637 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3638
3639 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3640
3641 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3642 Name.starts_with("avx512.mask.vpermil.p") ||
3643 Name.starts_with("avx512.mask.pshuf.d.")) {
3644 Value *Op0 = CI->getArgOperand(0);
3645 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3646 auto *VecTy = cast<FixedVectorType>(CI->getType());
3647 unsigned NumElts = VecTy->getNumElements();
3648 // Calculate the size of each index in the immediate.
3649 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3650 unsigned IdxMask = ((1 << IdxSize) - 1);
3651
3652 SmallVector<int, 8> Idxs(NumElts);
3653 // Lookup the bits for this element, wrapping around the immediate every
3654 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3655 // to offset by the first index of each group.
3656 for (unsigned i = 0; i != NumElts; ++i)
3657 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3658
3659 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3660
3661 if (CI->arg_size() == 4)
3662 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3663 CI->getArgOperand(2));
3664 } else if (Name == "sse2.pshufl.w" ||
3665 Name.starts_with("avx512.mask.pshufl.w.")) {
3666 Value *Op0 = CI->getArgOperand(0);
3667 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3668 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3669
3670 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3671 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3672
3673 SmallVector<int, 16> Idxs(NumElts);
3674 for (unsigned l = 0; l != NumElts; l += 8) {
3675 for (unsigned i = 0; i != 4; ++i)
3676 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3677 for (unsigned i = 4; i != 8; ++i)
3678 Idxs[i + l] = i + l;
3679 }
3680
3681 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3682
3683 if (CI->arg_size() == 4)
3684 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3685 CI->getArgOperand(2));
3686 } else if (Name == "sse2.pshufh.w" ||
3687 Name.starts_with("avx512.mask.pshufh.w.")) {
3688 Value *Op0 = CI->getArgOperand(0);
3689 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3690 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3691
3692 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3693 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3694
3695 SmallVector<int, 16> Idxs(NumElts);
3696 for (unsigned l = 0; l != NumElts; l += 8) {
3697 for (unsigned i = 0; i != 4; ++i)
3698 Idxs[i + l] = i + l;
3699 for (unsigned i = 0; i != 4; ++i)
3700 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3701 }
3702
3703 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3704
3705 if (CI->arg_size() == 4)
3706 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3707 CI->getArgOperand(2));
3708 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3709 Value *Op0 = CI->getArgOperand(0);
3710 Value *Op1 = CI->getArgOperand(1);
3711 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3712 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3713
3714 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3715 unsigned HalfLaneElts = NumLaneElts / 2;
3716
3717 SmallVector<int, 16> Idxs(NumElts);
3718 for (unsigned i = 0; i != NumElts; ++i) {
3719 // Base index is the starting element of the lane.
3720 Idxs[i] = i - (i % NumLaneElts);
3721 // If we are half way through the lane switch to the other source.
3722 if ((i % NumLaneElts) >= HalfLaneElts)
3723 Idxs[i] += NumElts;
3724 // Now select the specific element. By adding HalfLaneElts bits from
3725 // the immediate. Wrapping around the immediate every 8-bits.
3726 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3727 }
3728
3729 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3730
3731 Rep =
3732 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3733 } else if (Name.starts_with("avx512.mask.movddup") ||
3734 Name.starts_with("avx512.mask.movshdup") ||
3735 Name.starts_with("avx512.mask.movsldup")) {
3736 Value *Op0 = CI->getArgOperand(0);
3737 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3738 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3739
3740 unsigned Offset = 0;
3741 if (Name.starts_with("avx512.mask.movshdup."))
3742 Offset = 1;
3743
3744 SmallVector<int, 16> Idxs(NumElts);
3745 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3746 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3747 Idxs[i + l + 0] = i + l + Offset;
3748 Idxs[i + l + 1] = i + l + Offset;
3749 }
3750
3751 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3752
3753 Rep =
3754 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3755 } else if (Name.starts_with("avx512.mask.punpckl") ||
3756 Name.starts_with("avx512.mask.unpckl.")) {
3757 Value *Op0 = CI->getArgOperand(0);
3758 Value *Op1 = CI->getArgOperand(1);
3759 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3760 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3761
3762 SmallVector<int, 64> Idxs(NumElts);
3763 for (int l = 0; l != NumElts; l += NumLaneElts)
3764 for (int i = 0; i != NumLaneElts; ++i)
3765 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3766
3767 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3768
3769 Rep =
3770 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3771 } else if (Name.starts_with("avx512.mask.punpckh") ||
3772 Name.starts_with("avx512.mask.unpckh.")) {
3773 Value *Op0 = CI->getArgOperand(0);
3774 Value *Op1 = CI->getArgOperand(1);
3775 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3776 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3777
3778 SmallVector<int, 64> Idxs(NumElts);
3779 for (int l = 0; l != NumElts; l += NumLaneElts)
3780 for (int i = 0; i != NumLaneElts; ++i)
3781 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3782
3783 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3784
3785 Rep =
3786 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3787 } else if (Name.starts_with("avx512.mask.and.") ||
3788 Name.starts_with("avx512.mask.pand.")) {
3789 VectorType *FTy = cast<VectorType>(CI->getType());
3791 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3792 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3793 Rep = Builder.CreateBitCast(Rep, FTy);
3794 Rep =
3795 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3796 } else if (Name.starts_with("avx512.mask.andn.") ||
3797 Name.starts_with("avx512.mask.pandn.")) {
3798 VectorType *FTy = cast<VectorType>(CI->getType());
3800 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3801 Rep = Builder.CreateAnd(Rep,
3802 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3803 Rep = Builder.CreateBitCast(Rep, FTy);
3804 Rep =
3805 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3806 } else if (Name.starts_with("avx512.mask.or.") ||
3807 Name.starts_with("avx512.mask.por.")) {
3808 VectorType *FTy = cast<VectorType>(CI->getType());
3810 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3811 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3812 Rep = Builder.CreateBitCast(Rep, FTy);
3813 Rep =
3814 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3815 } else if (Name.starts_with("avx512.mask.xor.") ||
3816 Name.starts_with("avx512.mask.pxor.")) {
3817 VectorType *FTy = cast<VectorType>(CI->getType());
3819 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3820 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3821 Rep = Builder.CreateBitCast(Rep, FTy);
3822 Rep =
3823 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3824 } else if (Name.starts_with("avx512.mask.padd.")) {
3825 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3826 Rep =
3827 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3828 } else if (Name.starts_with("avx512.mask.psub.")) {
3829 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3830 Rep =
3831 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3832 } else if (Name.starts_with("avx512.mask.pmull.")) {
3833 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3834 Rep =
3835 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3836 } else if (Name.starts_with("avx512.mask.add.p")) {
3837 if (Name.ends_with(".512")) {
3838 Intrinsic::ID IID;
3839 if (Name[17] == 's')
3840 IID = Intrinsic::x86_avx512_add_ps_512;
3841 else
3842 IID = Intrinsic::x86_avx512_add_pd_512;
3843
3844 Rep = Builder.CreateIntrinsic(
3845 IID,
3846 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3847 } else {
3848 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3849 }
3850 Rep =
3851 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3852 } else if (Name.starts_with("avx512.mask.div.p")) {
3853 if (Name.ends_with(".512")) {
3854 Intrinsic::ID IID;
3855 if (Name[17] == 's')
3856 IID = Intrinsic::x86_avx512_div_ps_512;
3857 else
3858 IID = Intrinsic::x86_avx512_div_pd_512;
3859
3860 Rep = Builder.CreateIntrinsic(
3861 IID,
3862 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3863 } else {
3864 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3865 }
3866 Rep =
3867 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3868 } else if (Name.starts_with("avx512.mask.mul.p")) {
3869 if (Name.ends_with(".512")) {
3870 Intrinsic::ID IID;
3871 if (Name[17] == 's')
3872 IID = Intrinsic::x86_avx512_mul_ps_512;
3873 else
3874 IID = Intrinsic::x86_avx512_mul_pd_512;
3875
3876 Rep = Builder.CreateIntrinsic(
3877 IID,
3878 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3879 } else {
3880 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3881 }
3882 Rep =
3883 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3884 } else if (Name.starts_with("avx512.mask.sub.p")) {
3885 if (Name.ends_with(".512")) {
3886 Intrinsic::ID IID;
3887 if (Name[17] == 's')
3888 IID = Intrinsic::x86_avx512_sub_ps_512;
3889 else
3890 IID = Intrinsic::x86_avx512_sub_pd_512;
3891
3892 Rep = Builder.CreateIntrinsic(
3893 IID,
3894 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3895 } else {
3896 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3897 }
3898 Rep =
3899 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3900 } else if ((Name.starts_with("avx512.mask.max.p") ||
3901 Name.starts_with("avx512.mask.min.p")) &&
3902 Name.drop_front(18) == ".512") {
3903 bool IsDouble = Name[17] == 'd';
3904 bool IsMin = Name[13] == 'i';
3905 static const Intrinsic::ID MinMaxTbl[2][2] = {
3906 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3907 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3908 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3909
3910 Rep = Builder.CreateIntrinsic(
3911 IID,
3912 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3913 Rep =
3914 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3915 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3916 Rep =
3917 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3918 {CI->getArgOperand(0), Builder.getInt1(false)});
3919 Rep =
3920 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3921 } else if (Name.starts_with("avx512.mask.psll")) {
3922 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3923 bool IsVariable = Name[16] == 'v';
3924 char Size = Name[16] == '.' ? Name[17]
3925 : Name[17] == '.' ? Name[18]
3926 : Name[18] == '.' ? Name[19]
3927 : Name[20];
3928
3929 Intrinsic::ID IID;
3930 if (IsVariable && Name[17] != '.') {
3931 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3932 IID = Intrinsic::x86_avx2_psllv_q;
3933 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3934 IID = Intrinsic::x86_avx2_psllv_q_256;
3935 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3936 IID = Intrinsic::x86_avx2_psllv_d;
3937 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3938 IID = Intrinsic::x86_avx2_psllv_d_256;
3939 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3940 IID = Intrinsic::x86_avx512_psllv_w_128;
3941 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3942 IID = Intrinsic::x86_avx512_psllv_w_256;
3943 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3944 IID = Intrinsic::x86_avx512_psllv_w_512;
3945 else
3946 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3947 } else if (Name.ends_with(".128")) {
3948 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3949 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3950 : Intrinsic::x86_sse2_psll_d;
3951 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3952 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3953 : Intrinsic::x86_sse2_psll_q;
3954 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3955 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3956 : Intrinsic::x86_sse2_psll_w;
3957 else
3958 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3959 } else if (Name.ends_with(".256")) {
3960 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3961 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3962 : Intrinsic::x86_avx2_psll_d;
3963 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3964 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3965 : Intrinsic::x86_avx2_psll_q;
3966 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3967 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3968 : Intrinsic::x86_avx2_psll_w;
3969 else
3970 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3971 } else {
3972 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3973 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3974 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3975 : Intrinsic::x86_avx512_psll_d_512;
3976 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3977 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3978 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3979 : Intrinsic::x86_avx512_psll_q_512;
3980 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3981 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3982 : Intrinsic::x86_avx512_psll_w_512;
3983 else
3984 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3985 }
3986
3987 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3988 } else if (Name.starts_with("avx512.mask.psrl")) {
3989 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3990 bool IsVariable = Name[16] == 'v';
3991 char Size = Name[16] == '.' ? Name[17]
3992 : Name[17] == '.' ? Name[18]
3993 : Name[18] == '.' ? Name[19]
3994 : Name[20];
3995
3996 Intrinsic::ID IID;
3997 if (IsVariable && Name[17] != '.') {
3998 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3999 IID = Intrinsic::x86_avx2_psrlv_q;
4000 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
4001 IID = Intrinsic::x86_avx2_psrlv_q_256;
4002 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
4003 IID = Intrinsic::x86_avx2_psrlv_d;
4004 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
4005 IID = Intrinsic::x86_avx2_psrlv_d_256;
4006 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
4007 IID = Intrinsic::x86_avx512_psrlv_w_128;
4008 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
4009 IID = Intrinsic::x86_avx512_psrlv_w_256;
4010 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
4011 IID = Intrinsic::x86_avx512_psrlv_w_512;
4012 else
4013 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4014 } else if (Name.ends_with(".128")) {
4015 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4016 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4017 : Intrinsic::x86_sse2_psrl_d;
4018 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4019 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4020 : Intrinsic::x86_sse2_psrl_q;
4021 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4022 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4023 : Intrinsic::x86_sse2_psrl_w;
4024 else
4025 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4026 } else if (Name.ends_with(".256")) {
4027 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4028 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4029 : Intrinsic::x86_avx2_psrl_d;
4030 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4031 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4032 : Intrinsic::x86_avx2_psrl_q;
4033 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4034 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4035 : Intrinsic::x86_avx2_psrl_w;
4036 else
4037 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4038 } else {
4039 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4040 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4041 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4042 : Intrinsic::x86_avx512_psrl_d_512;
4043 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4044 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4045 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4046 : Intrinsic::x86_avx512_psrl_q_512;
4047 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4048 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4049 : Intrinsic::x86_avx512_psrl_w_512;
4050 else
4051 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4052 }
4053
4054 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4055 } else if (Name.starts_with("avx512.mask.psra")) {
4056 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4057 bool IsVariable = Name[16] == 'v';
4058 char Size = Name[16] == '.' ? Name[17]
4059 : Name[17] == '.' ? Name[18]
4060 : Name[18] == '.' ? Name[19]
4061 : Name[20];
4062
4063 Intrinsic::ID IID;
4064 if (IsVariable && Name[17] != '.') {
4065 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4066 IID = Intrinsic::x86_avx2_psrav_d;
4067 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4068 IID = Intrinsic::x86_avx2_psrav_d_256;
4069 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4070 IID = Intrinsic::x86_avx512_psrav_w_128;
4071 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4072 IID = Intrinsic::x86_avx512_psrav_w_256;
4073 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4074 IID = Intrinsic::x86_avx512_psrav_w_512;
4075 else
4076 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4077 } else if (Name.ends_with(".128")) {
4078 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4079 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4080 : Intrinsic::x86_sse2_psra_d;
4081 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4082 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4083 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4084 : Intrinsic::x86_avx512_psra_q_128;
4085 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4086 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4087 : Intrinsic::x86_sse2_psra_w;
4088 else
4089 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4090 } else if (Name.ends_with(".256")) {
4091 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4092 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4093 : Intrinsic::x86_avx2_psra_d;
4094 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4095 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4096 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4097 : Intrinsic::x86_avx512_psra_q_256;
4098 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4099 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4100 : Intrinsic::x86_avx2_psra_w;
4101 else
4102 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4103 } else {
4104 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4105 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4106 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4107 : Intrinsic::x86_avx512_psra_d_512;
4108 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4109 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4110 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4111 : Intrinsic::x86_avx512_psra_q_512;
4112 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4113 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4114 : Intrinsic::x86_avx512_psra_w_512;
4115 else
4116 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4117 }
4118
4119 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4120 } else if (Name.starts_with("avx512.mask.move.s")) {
4121 Rep = upgradeMaskedMove(Builder, *CI);
4122 } else if (Name.starts_with("avx512.cvtmask2")) {
4123 Rep = upgradeMaskToInt(Builder, *CI);
4124 } else if (Name.ends_with(".movntdqa")) {
4126 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4127
4128 LoadInst *LI = Builder.CreateAlignedLoad(
4129 CI->getType(), CI->getArgOperand(0),
4131 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4132 Rep = LI;
4133 } else if (Name.starts_with("fma.vfmadd.") ||
4134 Name.starts_with("fma.vfmsub.") ||
4135 Name.starts_with("fma.vfnmadd.") ||
4136 Name.starts_with("fma.vfnmsub.")) {
4137 bool NegMul = Name[6] == 'n';
4138 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4139 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4140
4141 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4142 CI->getArgOperand(2)};
4143
4144 if (IsScalar) {
4145 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4146 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4147 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4148 }
4149
4150 if (NegMul && !IsScalar)
4151 Ops[0] = Builder.CreateFNeg(Ops[0]);
4152 if (NegMul && IsScalar)
4153 Ops[1] = Builder.CreateFNeg(Ops[1]);
4154 if (NegAcc)
4155 Ops[2] = Builder.CreateFNeg(Ops[2]);
4156
4157 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4158
4159 if (IsScalar)
4160 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4161 } else if (Name.starts_with("fma4.vfmadd.s")) {
4162 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4163 CI->getArgOperand(2)};
4164
4165 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4166 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4167 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4168
4169 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4170
4171 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4172 Rep, (uint64_t)0);
4173 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4174 Name.starts_with("avx512.maskz.vfmadd.s") ||
4175 Name.starts_with("avx512.mask3.vfmadd.s") ||
4176 Name.starts_with("avx512.mask3.vfmsub.s") ||
4177 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4178 bool IsMask3 = Name[11] == '3';
4179 bool IsMaskZ = Name[11] == 'z';
4180 // Drop the "avx512.mask." to make it easier.
4181 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4182 bool NegMul = Name[2] == 'n';
4183 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4184
4185 Value *A = CI->getArgOperand(0);
4186 Value *B = CI->getArgOperand(1);
4187 Value *C = CI->getArgOperand(2);
4188
4189 if (NegMul && (IsMask3 || IsMaskZ))
4190 A = Builder.CreateFNeg(A);
4191 if (NegMul && !(IsMask3 || IsMaskZ))
4192 B = Builder.CreateFNeg(B);
4193 if (NegAcc)
4194 C = Builder.CreateFNeg(C);
4195
4196 A = Builder.CreateExtractElement(A, (uint64_t)0);
4197 B = Builder.CreateExtractElement(B, (uint64_t)0);
4198 C = Builder.CreateExtractElement(C, (uint64_t)0);
4199
4200 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4201 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4202 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4203
4204 Intrinsic::ID IID;
4205 if (Name.back() == 'd')
4206 IID = Intrinsic::x86_avx512_vfmadd_f64;
4207 else
4208 IID = Intrinsic::x86_avx512_vfmadd_f32;
4209 Rep = Builder.CreateIntrinsic(IID, Ops);
4210 } else {
4211 Rep = Builder.CreateFMA(A, B, C);
4212 }
4213
4214 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4215 : IsMask3 ? C
4216 : A;
4217
4218 // For Mask3 with NegAcc, we need to create a new extractelement that
4219 // avoids the negation above.
4220 if (NegAcc && IsMask3)
4221 PassThru =
4222 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4223
4224 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4225 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4226 (uint64_t)0);
4227 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4228 Name.starts_with("avx512.mask.vfnmadd.p") ||
4229 Name.starts_with("avx512.mask.vfnmsub.p") ||
4230 Name.starts_with("avx512.mask3.vfmadd.p") ||
4231 Name.starts_with("avx512.mask3.vfmsub.p") ||
4232 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4233 Name.starts_with("avx512.maskz.vfmadd.p")) {
4234 bool IsMask3 = Name[11] == '3';
4235 bool IsMaskZ = Name[11] == 'z';
4236 // Drop the "avx512.mask." to make it easier.
4237 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4238 bool NegMul = Name[2] == 'n';
4239 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4240
4241 Value *A = CI->getArgOperand(0);
4242 Value *B = CI->getArgOperand(1);
4243 Value *C = CI->getArgOperand(2);
4244
4245 if (NegMul && (IsMask3 || IsMaskZ))
4246 A = Builder.CreateFNeg(A);
4247 if (NegMul && !(IsMask3 || IsMaskZ))
4248 B = Builder.CreateFNeg(B);
4249 if (NegAcc)
4250 C = Builder.CreateFNeg(C);
4251
4252 if (CI->arg_size() == 5 &&
4253 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4254 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4255 Intrinsic::ID IID;
4256 // Check the character before ".512" in string.
4257 if (Name[Name.size() - 5] == 's')
4258 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4259 else
4260 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4261
4262 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4263 } else {
4264 Rep = Builder.CreateFMA(A, B, C);
4265 }
4266
4267 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4268 : IsMask3 ? CI->getArgOperand(2)
4269 : CI->getArgOperand(0);
4270
4271 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4272 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4273 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4274 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4275 Intrinsic::ID IID;
4276 if (VecWidth == 128 && EltWidth == 32)
4277 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4278 else if (VecWidth == 256 && EltWidth == 32)
4279 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4280 else if (VecWidth == 128 && EltWidth == 64)
4281 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4282 else if (VecWidth == 256 && EltWidth == 64)
4283 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4284 else
4285 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4286
4287 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4288 CI->getArgOperand(2)};
4289 Ops[2] = Builder.CreateFNeg(Ops[2]);
4290 Rep = Builder.CreateIntrinsic(IID, Ops);
4291 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4292 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4293 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4294 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4295 bool IsMask3 = Name[11] == '3';
4296 bool IsMaskZ = Name[11] == 'z';
4297 // Drop the "avx512.mask." to make it easier.
4298 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4299 bool IsSubAdd = Name[3] == 's';
4300 if (CI->arg_size() == 5) {
4301 Intrinsic::ID IID;
4302 // Check the character before ".512" in string.
4303 if (Name[Name.size() - 5] == 's')
4304 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4305 else
4306 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4307
4308 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4309 CI->getArgOperand(2), CI->getArgOperand(4)};
4310 if (IsSubAdd)
4311 Ops[2] = Builder.CreateFNeg(Ops[2]);
4312
4313 Rep = Builder.CreateIntrinsic(IID, Ops);
4314 } else {
4315 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4316
4317 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4318 CI->getArgOperand(2)};
4319
4321 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4322 Value *Odd = Builder.CreateCall(FMA, Ops);
4323 Ops[2] = Builder.CreateFNeg(Ops[2]);
4324 Value *Even = Builder.CreateCall(FMA, Ops);
4325
4326 if (IsSubAdd)
4327 std::swap(Even, Odd);
4328
4329 SmallVector<int, 32> Idxs(NumElts);
4330 for (int i = 0; i != NumElts; ++i)
4331 Idxs[i] = i + (i % 2) * NumElts;
4332
4333 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4334 }
4335
4336 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4337 : IsMask3 ? CI->getArgOperand(2)
4338 : CI->getArgOperand(0);
4339
4340 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4341 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4342 Name.starts_with("avx512.maskz.pternlog.")) {
4343 bool ZeroMask = Name[11] == 'z';
4344 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4345 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4346 Intrinsic::ID IID;
4347 if (VecWidth == 128 && EltWidth == 32)
4348 IID = Intrinsic::x86_avx512_pternlog_d_128;
4349 else if (VecWidth == 256 && EltWidth == 32)
4350 IID = Intrinsic::x86_avx512_pternlog_d_256;
4351 else if (VecWidth == 512 && EltWidth == 32)
4352 IID = Intrinsic::x86_avx512_pternlog_d_512;
4353 else if (VecWidth == 128 && EltWidth == 64)
4354 IID = Intrinsic::x86_avx512_pternlog_q_128;
4355 else if (VecWidth == 256 && EltWidth == 64)
4356 IID = Intrinsic::x86_avx512_pternlog_q_256;
4357 else if (VecWidth == 512 && EltWidth == 64)
4358 IID = Intrinsic::x86_avx512_pternlog_q_512;
4359 else
4360 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4361
4362 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4363 CI->getArgOperand(2), CI->getArgOperand(3)};
4364 Rep = Builder.CreateIntrinsic(IID, Args);
4365 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4366 : CI->getArgOperand(0);
4367 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4368 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4369 Name.starts_with("avx512.maskz.vpmadd52")) {
4370 bool ZeroMask = Name[11] == 'z';
4371 bool High = Name[20] == 'h' || Name[21] == 'h';
4372 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4373 Intrinsic::ID IID;
4374 if (VecWidth == 128 && !High)
4375 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4376 else if (VecWidth == 256 && !High)
4377 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4378 else if (VecWidth == 512 && !High)
4379 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4380 else if (VecWidth == 128 && High)
4381 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4382 else if (VecWidth == 256 && High)
4383 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4384 else if (VecWidth == 512 && High)
4385 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4386 else
4387 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4388
4389 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4390 CI->getArgOperand(2)};
4391 Rep = Builder.CreateIntrinsic(IID, Args);
4392 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4393 : CI->getArgOperand(0);
4394 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4395 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4396 Name.starts_with("avx512.mask.vpermt2var.") ||
4397 Name.starts_with("avx512.maskz.vpermt2var.")) {
4398 bool ZeroMask = Name[11] == 'z';
4399 bool IndexForm = Name[17] == 'i';
4400 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4401 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4402 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4403 Name.starts_with("avx512.mask.vpdpbusds.") ||
4404 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4405 bool ZeroMask = Name[11] == 'z';
4406 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4407 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4408 Intrinsic::ID IID;
4409 if (VecWidth == 128 && !IsSaturating)
4410 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4411 else if (VecWidth == 256 && !IsSaturating)
4412 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4413 else if (VecWidth == 512 && !IsSaturating)
4414 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4415 else if (VecWidth == 128 && IsSaturating)
4416 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4417 else if (VecWidth == 256 && IsSaturating)
4418 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4419 else if (VecWidth == 512 && IsSaturating)
4420 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4421 else
4422 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4423
4424 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4425 CI->getArgOperand(2)};
4426
4427 // Input arguments types were incorrectly set to vectors of i32 before but
4428 // they should be vectors of i8. Insert bit cast when encountering the old
4429 // types
4430 if (Args[1]->getType()->isVectorTy() &&
4431 cast<VectorType>(Args[1]->getType())
4432 ->getElementType()
4433 ->isIntegerTy(32) &&
4434 Args[2]->getType()->isVectorTy() &&
4435 cast<VectorType>(Args[2]->getType())
4436 ->getElementType()
4437 ->isIntegerTy(32)) {
4438 Type *NewArgType = nullptr;
4439 if (VecWidth == 128)
4440 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4441 else if (VecWidth == 256)
4442 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4443 else if (VecWidth == 512)
4444 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4445 else
4446 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4447 CI);
4448
4449 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4450 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4451 }
4452
4453 Rep = Builder.CreateIntrinsic(IID, Args);
4454 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4455 : CI->getArgOperand(0);
4456 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4457 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4458 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4459 Name.starts_with("avx512.mask.vpdpwssds.") ||
4460 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4461 bool ZeroMask = Name[11] == 'z';
4462 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4463 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4464 Intrinsic::ID IID;
4465 if (VecWidth == 128 && !IsSaturating)
4466 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4467 else if (VecWidth == 256 && !IsSaturating)
4468 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4469 else if (VecWidth == 512 && !IsSaturating)
4470 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4471 else if (VecWidth == 128 && IsSaturating)
4472 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4473 else if (VecWidth == 256 && IsSaturating)
4474 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4475 else if (VecWidth == 512 && IsSaturating)
4476 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4477 else
4478 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4479
4480 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4481 CI->getArgOperand(2)};
4482
4483 // Input arguments types were incorrectly set to vectors of i32 before but
4484 // they should be vectors of i16. Insert bit cast when encountering the old
4485 // types
4486 if (Args[1]->getType()->isVectorTy() &&
4487 cast<VectorType>(Args[1]->getType())
4488 ->getElementType()
4489 ->isIntegerTy(32) &&
4490 Args[2]->getType()->isVectorTy() &&
4491 cast<VectorType>(Args[2]->getType())
4492 ->getElementType()
4493 ->isIntegerTy(32)) {
4494 Type *NewArgType = nullptr;
4495 if (VecWidth == 128)
4496 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4497 else if (VecWidth == 256)
4498 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4499 else if (VecWidth == 512)
4500 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4501 else
4502 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4503 CI);
4504
4505 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4506 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4507 }
4508
4509 Rep = Builder.CreateIntrinsic(IID, Args);
4510 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4511 : CI->getArgOperand(0);
4512 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4513 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4514 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4515 Name == "subborrow.u32" || Name == "subborrow.u64") {
4516 Intrinsic::ID IID;
4517 if (Name[0] == 'a' && Name.back() == '2')
4518 IID = Intrinsic::x86_addcarry_32;
4519 else if (Name[0] == 'a' && Name.back() == '4')
4520 IID = Intrinsic::x86_addcarry_64;
4521 else if (Name[0] == 's' && Name.back() == '2')
4522 IID = Intrinsic::x86_subborrow_32;
4523 else if (Name[0] == 's' && Name.back() == '4')
4524 IID = Intrinsic::x86_subborrow_64;
4525 else
4526 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4527
4528 // Make a call with 3 operands.
4529 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4530 CI->getArgOperand(2)};
4531 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4532
4533 // Extract the second result and store it.
4534 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4535 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4536 // Replace the original call result with the first result of the new call.
4537 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4538
4539 CI->replaceAllUsesWith(CF);
4540 Rep = nullptr;
4541 } else if (Name.starts_with("avx512.mask.") &&
4542 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4543 // Rep will be updated by the call in the condition.
4544 } else
4545 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4546
4547 return Rep;
4548}
4549
4551 Function *F, IRBuilder<> &Builder) {
4552 if (Name.starts_with("neon.bfcvt")) {
4553 if (Name.starts_with("neon.bfcvtn2")) {
4554 SmallVector<int, 32> LoMask(4);
4555 std::iota(LoMask.begin(), LoMask.end(), 0);
4556 SmallVector<int, 32> ConcatMask(8);
4557 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4558 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4559 Value *Trunc =
4560 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4561 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4562 } else if (Name.starts_with("neon.bfcvtn")) {
4563 SmallVector<int, 32> ConcatMask(8);
4564 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4565 Type *V4BF16 =
4566 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4567 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4568 dbgs() << "Trunc: " << *Trunc << "\n";
4569 return Builder.CreateShuffleVector(
4570 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4571 } else {
4572 return Builder.CreateFPTrunc(CI->getOperand(0),
4573 Type::getBFloatTy(F->getContext()));
4574 }
4575 } else if (Name.starts_with("sve.fcvt")) {
4576 Intrinsic::ID NewID =
4578 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4579 .Case("sve.fcvtnt.bf16f32",
4580 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4582 if (NewID == Intrinsic::not_intrinsic)
4583 llvm_unreachable("Unhandled Intrinsic!");
4584
4585 SmallVector<Value *, 3> Args(CI->args());
4586
4587 // The original intrinsics incorrectly used a predicate based on the
4588 // smallest element type rather than the largest.
4589 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4590 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4591
4592 if (Args[1]->getType() != BadPredTy)
4593 llvm_unreachable("Unexpected predicate type!");
4594
4595 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4596 BadPredTy, Args[1]);
4597 Args[1] = Builder.CreateIntrinsic(
4598 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4599
4600 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4601 CI->getName());
4602 }
4603
4604 llvm_unreachable("Unhandled Intrinsic!");
4605}
4606
4608 IRBuilder<> &Builder) {
4609 if (Name == "mve.vctp64.old") {
4610 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4611 // correct type.
4612 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4613 CI->getArgOperand(0),
4614 /*FMFSource=*/nullptr, CI->getName());
4615 Value *C1 = Builder.CreateIntrinsic(
4616 Intrinsic::arm_mve_pred_v2i,
4617 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4618 return Builder.CreateIntrinsic(
4619 Intrinsic::arm_mve_pred_i2v,
4620 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4621 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4622 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4623 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4624 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4625 Name ==
4626 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4627 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4628 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4629 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4630 Name ==
4631 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4632 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4633 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4634 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4635 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4636 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4637 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4638 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4639 std::vector<Type *> Tys;
4640 unsigned ID = CI->getIntrinsicID();
4641 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4642 switch (ID) {
4643 case Intrinsic::arm_mve_mull_int_predicated:
4644 case Intrinsic::arm_mve_vqdmull_predicated:
4645 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4646 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4647 break;
4648 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4649 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4650 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4651 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4652 V2I1Ty};
4653 break;
4654 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4655 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4656 CI->getOperand(1)->getType(), V2I1Ty};
4657 break;
4658 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4659 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4660 CI->getOperand(2)->getType(), V2I1Ty};
4661 break;
4662 case Intrinsic::arm_cde_vcx1q_predicated:
4663 case Intrinsic::arm_cde_vcx1qa_predicated:
4664 case Intrinsic::arm_cde_vcx2q_predicated:
4665 case Intrinsic::arm_cde_vcx2qa_predicated:
4666 case Intrinsic::arm_cde_vcx3q_predicated:
4667 case Intrinsic::arm_cde_vcx3qa_predicated:
4668 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4669 break;
4670 default:
4671 llvm_unreachable("Unhandled Intrinsic!");
4672 }
4673
4674 std::vector<Value *> Ops;
4675 for (Value *Op : CI->args()) {
4676 Type *Ty = Op->getType();
4677 if (Ty->getScalarSizeInBits() == 1) {
4678 Value *C1 = Builder.CreateIntrinsic(
4679 Intrinsic::arm_mve_pred_v2i,
4680 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4681 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4682 }
4683 Ops.push_back(Op);
4684 }
4685
4686 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4687 CI->getName());
4688 }
4689 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4690}
4691
4692// These are expected to have the arguments:
4693// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4694//
4695// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4696//
4698 Function *F, IRBuilder<> &Builder) {
4699 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4700 // for compatibility.
4701 auto UpgradeLegacyWMMAIUIntrinsicCall =
4702 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4703 ArrayRef<Type *> OverloadTys) -> Value * {
4704 // Prepare arguments, append clamp=0 for compatibility
4705 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4706 Args.push_back(Builder.getFalse());
4707
4708 // Insert the declaration for the right overload types
4710 F->getParent(), F->getIntrinsicID(), OverloadTys);
4711
4712 // Copy operand bundles if any
4714 CI->getOperandBundlesAsDefs(Bundles);
4715
4716 // Create the new call and copy calling properties
4717 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4718 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4719 NewCall->setCallingConv(CI->getCallingConv());
4720 NewCall->setAttributes(CI->getAttributes());
4721 NewCall->setDebugLoc(CI->getDebugLoc());
4722 NewCall->copyMetadata(*CI);
4723 return NewCall;
4724 };
4725
4726 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4727 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4728 "intrinsic should have 7 arguments");
4729 Type *T1 = CI->getArgOperand(4)->getType();
4730 Type *T2 = CI->getArgOperand(1)->getType();
4731 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4732 }
4733 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4734 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4735 "intrinsic should have 8 arguments");
4736 Type *T1 = CI->getArgOperand(4)->getType();
4737 Type *T2 = CI->getArgOperand(1)->getType();
4738 Type *T3 = CI->getArgOperand(3)->getType();
4739 Type *T4 = CI->getArgOperand(5)->getType();
4740 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4741 }
4742
4743 switch (F->getIntrinsicID()) {
4744 default:
4745 break;
4746 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4747 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4748 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4749 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4750 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4751 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4752 // Drop src0 and src1 modifiers.
4753 const Value *Op0 = CI->getArgOperand(0);
4754 const Value *Op2 = CI->getArgOperand(2);
4755 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4756 const ConstantInt *ModA = dyn_cast<ConstantInt>(Op0);
4757 const ConstantInt *ModB = dyn_cast<ConstantInt>(Op2);
4758 if (!ModA->isZero() || !ModB->isZero())
4759 reportFatalUsageError(Name + " matrix A and B modifiers shall be zero");
4760
4762 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4763 Args.push_back(CI->getArgOperand(I));
4764
4765 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4766 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4767 Overloads.push_back(Args[3]->getType());
4769 F->getParent(), F->getIntrinsicID(), Overloads);
4770
4772 CI->getOperandBundlesAsDefs(Bundles);
4773
4774 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4775 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4776 NewCall->setCallingConv(CI->getCallingConv());
4777 NewCall->setAttributes(CI->getAttributes());
4778 NewCall->setDebugLoc(CI->getDebugLoc());
4779 NewCall->copyMetadata(*CI);
4780 NewCall->takeName(CI);
4781 return NewCall;
4782 }
4783 }
4784
4785 AtomicRMWInst::BinOp RMWOp =
4787 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4788 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4789 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4790 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4791 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4792 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4793 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4794 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4795 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4796 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4797 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4798 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4799 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4800
4801 unsigned NumOperands = CI->getNumOperands();
4802 if (NumOperands < 3) // Malformed bitcode.
4803 return nullptr;
4804
4805 Value *Ptr = CI->getArgOperand(0);
4806 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4807 if (!PtrTy) // Malformed.
4808 return nullptr;
4809
4810 Value *Val = CI->getArgOperand(1);
4811 if (Val->getType() != CI->getType()) // Malformed.
4812 return nullptr;
4813
4814 ConstantInt *OrderArg = nullptr;
4815 bool IsVolatile = false;
4816
4817 // These should have 5 arguments (plus the callee). A separate version of the
4818 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4819 if (NumOperands > 3)
4820 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4821
4822 // Ignore scope argument at 3
4823
4824 if (NumOperands > 5) {
4825 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4826 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4827 }
4828
4830 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4831 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4834
4835 LLVMContext &Ctx = F->getContext();
4836
4837 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4838 Type *RetTy = CI->getType();
4839 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4840 if (VT->getElementType()->isIntegerTy(16)) {
4841 VectorType *AsBF16 =
4842 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4843 Val = Builder.CreateBitCast(Val, AsBF16);
4844 }
4845 }
4846
4847 // The scope argument never really worked correctly. Use agent as the most
4848 // conservative option which should still always produce the instruction.
4849 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4850 AtomicRMWInst *RMW =
4851 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4852
4853 unsigned AddrSpace = PtrTy->getAddressSpace();
4854 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4855 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4856 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4857 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4858 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4859 }
4860
4861 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4862 MDBuilder MDB(F->getContext());
4863 MDNode *RangeNotPrivate =
4866 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4867 }
4868
4869 if (IsVolatile)
4870 RMW->setVolatile(true);
4871
4872 return Builder.CreateBitCast(RMW, RetTy);
4873}
4874
4875/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4876/// plain MDNode, as it's the verifier's job to check these are the correct
4877/// types later.
4878static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4879 if (Op < CI->arg_size()) {
4880 if (MetadataAsValue *MAV =
4882 Metadata *MD = MAV->getMetadata();
4883 return dyn_cast_if_present<MDNode>(MD);
4884 }
4885 }
4886 return nullptr;
4887}
4888
4889/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4890static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4891 if (Op < CI->arg_size())
4893 return MAV->getMetadata();
4894 return nullptr;
4895}
4896
4898 // The MDNode attached to this instruction might not be the correct type,
4899 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4900 return I->getDebugLoc().getAsMDNode();
4901}
4902
4903/// Convert debug intrinsic calls to non-instruction debug records.
4904/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4905/// \p CI - The debug intrinsic call.
4907 DbgRecord *DR = nullptr;
4908 if (Name == "label") {
4910 CI->getDebugLoc());
4911 } else if (Name == "assign") {
4914 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4915 unwrapMAVMetadataOp(CI, 4),
4916 /*The address is a Value ref, it will be stored as a Metadata */
4917 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4918 } else if (Name == "declare") {
4921 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4922 getDebugLocSafe(CI));
4923 } else if (Name == "addr") {
4924 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4925 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4926 // Don't try to add something to the expression if it's not an expression.
4927 // Instead, allow the verifier to fail later.
4928 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4929 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4930 }
4933 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4934 getDebugLocSafe(CI));
4935 } else if (Name == "value") {
4936 // An old version of dbg.value had an extra offset argument.
4937 unsigned VarOp = 1;
4938 unsigned ExprOp = 2;
4939 if (CI->arg_size() == 4) {
4941 // Nonzero offset dbg.values get dropped without a replacement.
4942 if (!Offset || !Offset->isNullValue())
4943 return;
4944 VarOp = 2;
4945 ExprOp = 3;
4946 }
4949 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4950 nullptr, getDebugLocSafe(CI));
4951 }
4952 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4953 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4954}
4955
4958 if (!Offset)
4959 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4960 int64_t OffsetVal = Offset->getSExtValue();
4961 return Builder.CreateIntrinsic(OffsetVal >= 0
4962 ? Intrinsic::vector_splice_left
4963 : Intrinsic::vector_splice_right,
4964 CI->getType(),
4965 {CI->getArgOperand(0), CI->getArgOperand(1),
4966 Builder.getInt32(std::abs(OffsetVal))});
4967}
4968
4970 Function *F, IRBuilder<> &Builder) {
4971 if (Name.starts_with("to.fp16")) {
4972 Value *Cast =
4973 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
4974 return Builder.CreateBitCast(Cast, CI->getType());
4975 }
4976
4977 if (Name.starts_with("from.fp16")) {
4978 Value *Cast =
4979 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
4980 return Builder.CreateFPExt(Cast, CI->getType());
4981 }
4982
4983 return nullptr;
4984}
4985
4986/// Upgrade a call to an old intrinsic. All argument and return casting must be
4987/// provided to seamlessly integrate with existing context.
4989 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4990 // checks the callee's function type matches. It's likely we need to handle
4991 // type changes here.
4993 if (!F)
4994 return;
4995
4996 LLVMContext &C = CI->getContext();
4997 IRBuilder<> Builder(C);
4998 if (isa<FPMathOperator>(CI))
4999 Builder.setFastMathFlags(CI->getFastMathFlags());
5000 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
5001
5002 if (!NewFn) {
5003 // Get the Function's name.
5004 StringRef Name = F->getName();
5005 if (!Name.consume_front("llvm."))
5006 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
5007
5008 bool IsX86 = Name.consume_front("x86.");
5009 bool IsNVVM = Name.consume_front("nvvm.");
5010 bool IsAArch64 = Name.consume_front("aarch64.");
5011 bool IsARM = Name.consume_front("arm.");
5012 bool IsAMDGCN = Name.consume_front("amdgcn.");
5013 bool IsDbg = Name.consume_front("dbg.");
5014 bool IsOldSplice =
5015 (Name.consume_front("experimental.vector.splice") ||
5016 Name.consume_front("vector.splice")) &&
5017 !(Name.starts_with(".left") || Name.starts_with(".right"));
5018 Value *Rep = nullptr;
5019
5020 if (!IsX86 && Name == "stackprotectorcheck") {
5021 Rep = nullptr;
5022 } else if (IsNVVM) {
5023 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5024 } else if (IsX86) {
5025 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5026 } else if (IsAArch64) {
5027 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5028 } else if (IsARM) {
5029 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5030 } else if (IsAMDGCN) {
5031 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5032 } else if (IsDbg) {
5034 } else if (IsOldSplice) {
5035 Rep = upgradeVectorSplice(CI, Builder);
5036 } else if (Name.consume_front("convert.")) {
5037 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5038 } else {
5039 llvm_unreachable("Unknown function for CallBase upgrade.");
5040 }
5041
5042 if (Rep)
5043 CI->replaceAllUsesWith(Rep);
5044 CI->eraseFromParent();
5045 return;
5046 }
5047
5048 const auto &DefaultCase = [&]() -> void {
5049 if (F == NewFn)
5050 return;
5051
5052 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5053 // Handle generic mangling change.
5054 assert(
5055 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5056 "Unknown function for CallBase upgrade and isn't just a name change");
5057 CI->setCalledFunction(NewFn);
5058 return;
5059 }
5060
5061 // This must be an upgrade from a named to a literal struct.
5062 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
5063 assert(OldST != NewFn->getReturnType() &&
5064 "Return type must have changed");
5065 assert(OldST->getNumElements() ==
5066 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5067 "Must have same number of elements");
5068
5069 SmallVector<Value *> Args(CI->args());
5070 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5071 NewCI->setAttributes(CI->getAttributes());
5072 Value *Res = PoisonValue::get(OldST);
5073 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5074 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5075 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5076 }
5077 CI->replaceAllUsesWith(Res);
5078 CI->eraseFromParent();
5079 return;
5080 }
5081
5082 // We're probably about to produce something invalid. Let the verifier catch
5083 // it instead of dying here.
5084 CI->setCalledOperand(
5086 return;
5087 };
5088 CallInst *NewCall = nullptr;
5089 switch (NewFn->getIntrinsicID()) {
5090 default: {
5091 DefaultCase();
5092 return;
5093 }
5094 case Intrinsic::arm_neon_vst1:
5095 case Intrinsic::arm_neon_vst2:
5096 case Intrinsic::arm_neon_vst3:
5097 case Intrinsic::arm_neon_vst4:
5098 case Intrinsic::arm_neon_vst2lane:
5099 case Intrinsic::arm_neon_vst3lane:
5100 case Intrinsic::arm_neon_vst4lane: {
5101 SmallVector<Value *, 4> Args(CI->args());
5102 NewCall = Builder.CreateCall(NewFn, Args);
5103 break;
5104 }
5105 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5106 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5107 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5108 LLVMContext &Ctx = F->getParent()->getContext();
5109 SmallVector<Value *, 4> Args(CI->args());
5110 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5111 cast<ConstantInt>(Args[3])->getZExtValue());
5112 NewCall = Builder.CreateCall(NewFn, Args);
5113 break;
5114 }
5115 case Intrinsic::aarch64_sve_ld3_sret:
5116 case Intrinsic::aarch64_sve_ld4_sret:
5117 case Intrinsic::aarch64_sve_ld2_sret: {
5118 // Is this a trivial remangle of the name to support ptr address spaces?
5119 if (isa<StructType>(F->getReturnType())) {
5120 DefaultCase();
5121 return;
5122 }
5123
5124 StringRef Name = F->getName();
5125 Name = Name.substr(5);
5126 unsigned N = StringSwitch<unsigned>(Name)
5127 .StartsWith("aarch64.sve.ld2", 2)
5128 .StartsWith("aarch64.sve.ld3", 3)
5129 .StartsWith("aarch64.sve.ld4", 4)
5130 .Default(0);
5131 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5132 unsigned MinElts = RetTy->getMinNumElements() / N;
5133 SmallVector<Value *, 2> Args(CI->args());
5134 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5135 Value *Ret = llvm::PoisonValue::get(RetTy);
5136 for (unsigned I = 0; I < N; I++) {
5137 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5138 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5139 }
5140 NewCall = dyn_cast<CallInst>(Ret);
5141 break;
5142 }
5143
5144 case Intrinsic::coro_end: {
5145 SmallVector<Value *, 3> Args(CI->args());
5146 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5147 NewCall = Builder.CreateCall(NewFn, Args);
5148 break;
5149 }
5150
5151 case Intrinsic::vector_extract: {
5152 StringRef Name = F->getName();
5153 Name = Name.substr(5); // Strip llvm
5154 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5155 DefaultCase();
5156 return;
5157 }
5158 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5159 unsigned MinElts = RetTy->getMinNumElements();
5160 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5161 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5162 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5163 break;
5164 }
5165
5166 case Intrinsic::vector_insert: {
5167 StringRef Name = F->getName();
5168 Name = Name.substr(5);
5169 if (!Name.starts_with("aarch64.sve.tuple")) {
5170 DefaultCase();
5171 return;
5172 }
5173 if (Name.starts_with("aarch64.sve.tuple.set")) {
5174 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5175 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5176 Value *NewIdx =
5177 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5178 NewCall = Builder.CreateCall(
5179 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5180 break;
5181 }
5182 if (Name.starts_with("aarch64.sve.tuple.create")) {
5183 unsigned N = StringSwitch<unsigned>(Name)
5184 .StartsWith("aarch64.sve.tuple.create2", 2)
5185 .StartsWith("aarch64.sve.tuple.create3", 3)
5186 .StartsWith("aarch64.sve.tuple.create4", 4)
5187 .Default(0);
5188 assert(N > 1 && "Create is expected to be between 2-4");
5189 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5190 Value *Ret = llvm::PoisonValue::get(RetTy);
5191 unsigned MinElts = RetTy->getMinNumElements() / N;
5192 for (unsigned I = 0; I < N; I++) {
5193 Value *V = CI->getArgOperand(I);
5194 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5195 }
5196 NewCall = dyn_cast<CallInst>(Ret);
5197 }
5198 break;
5199 }
5200
5201 case Intrinsic::arm_neon_bfdot:
5202 case Intrinsic::arm_neon_bfmmla:
5203 case Intrinsic::arm_neon_bfmlalb:
5204 case Intrinsic::arm_neon_bfmlalt:
5205 case Intrinsic::aarch64_neon_bfdot:
5206 case Intrinsic::aarch64_neon_bfmmla:
5207 case Intrinsic::aarch64_neon_bfmlalb:
5208 case Intrinsic::aarch64_neon_bfmlalt: {
5210 assert(CI->arg_size() == 3 &&
5211 "Mismatch between function args and call args");
5212 size_t OperandWidth =
5214 assert((OperandWidth == 64 || OperandWidth == 128) &&
5215 "Unexpected operand width");
5216 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5217 auto Iter = CI->args().begin();
5218 Args.push_back(*Iter++);
5219 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5220 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5221 NewCall = Builder.CreateCall(NewFn, Args);
5222 break;
5223 }
5224
5225 case Intrinsic::bitreverse:
5226 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5227 break;
5228
5229 case Intrinsic::ctlz:
5230 case Intrinsic::cttz: {
5231 if (CI->arg_size() != 1) {
5232 DefaultCase();
5233 return;
5234 }
5235
5236 NewCall =
5237 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5238 break;
5239 }
5240
5241 case Intrinsic::objectsize: {
5242 Value *NullIsUnknownSize =
5243 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5244 Value *Dynamic =
5245 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5246 NewCall = Builder.CreateCall(
5247 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5248 break;
5249 }
5250
5251 case Intrinsic::ctpop:
5252 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5253 break;
5254 case Intrinsic::dbg_value: {
5255 StringRef Name = F->getName();
5256 Name = Name.substr(5); // Strip llvm.
5257 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5258 if (Name.starts_with("dbg.addr")) {
5260 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5261 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5262 NewCall =
5263 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5264 MetadataAsValue::get(C, Expr)});
5265 break;
5266 }
5267
5268 // Upgrade from the old version that had an extra offset argument.
5269 assert(CI->arg_size() == 4);
5270 // Drop nonzero offsets instead of attempting to upgrade them.
5272 if (Offset->isNullValue()) {
5273 NewCall = Builder.CreateCall(
5274 NewFn,
5275 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5276 break;
5277 }
5278 CI->eraseFromParent();
5279 return;
5280 }
5281
5282 case Intrinsic::ptr_annotation:
5283 // Upgrade from versions that lacked the annotation attribute argument.
5284 if (CI->arg_size() != 4) {
5285 DefaultCase();
5286 return;
5287 }
5288
5289 // Create a new call with an added null annotation attribute argument.
5290 NewCall = Builder.CreateCall(
5291 NewFn,
5292 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5293 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5294 NewCall->takeName(CI);
5295 CI->replaceAllUsesWith(NewCall);
5296 CI->eraseFromParent();
5297 return;
5298
5299 case Intrinsic::var_annotation:
5300 // Upgrade from versions that lacked the annotation attribute argument.
5301 if (CI->arg_size() != 4) {
5302 DefaultCase();
5303 return;
5304 }
5305 // Create a new call with an added null annotation attribute argument.
5306 NewCall = Builder.CreateCall(
5307 NewFn,
5308 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5309 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5310 NewCall->takeName(CI);
5311 CI->replaceAllUsesWith(NewCall);
5312 CI->eraseFromParent();
5313 return;
5314
5315 case Intrinsic::riscv_aes32dsi:
5316 case Intrinsic::riscv_aes32dsmi:
5317 case Intrinsic::riscv_aes32esi:
5318 case Intrinsic::riscv_aes32esmi:
5319 case Intrinsic::riscv_sm4ks:
5320 case Intrinsic::riscv_sm4ed: {
5321 // The last argument to these intrinsics used to be i8 and changed to i32.
5322 // The type overload for sm4ks and sm4ed was removed.
5323 Value *Arg2 = CI->getArgOperand(2);
5324 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5325 return;
5326
5327 Value *Arg0 = CI->getArgOperand(0);
5328 Value *Arg1 = CI->getArgOperand(1);
5329 if (CI->getType()->isIntegerTy(64)) {
5330 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5331 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5332 }
5333
5334 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5335 cast<ConstantInt>(Arg2)->getZExtValue());
5336
5337 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5338 Value *Res = NewCall;
5339 if (Res->getType() != CI->getType())
5340 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5341 NewCall->takeName(CI);
5342 CI->replaceAllUsesWith(Res);
5343 CI->eraseFromParent();
5344 return;
5345 }
5346 case Intrinsic::nvvm_mapa_shared_cluster: {
5347 // Create a new call with the correct address space.
5348 NewCall =
5349 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5350 Value *Res = NewCall;
5351 Res = Builder.CreateAddrSpaceCast(
5352 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5353 NewCall->takeName(CI);
5354 CI->replaceAllUsesWith(Res);
5355 CI->eraseFromParent();
5356 return;
5357 }
5358 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5359 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5360 // Create a new call with the correct address space.
5361 SmallVector<Value *, 4> Args(CI->args());
5362 Args[0] = Builder.CreateAddrSpaceCast(
5363 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5364
5365 NewCall = Builder.CreateCall(NewFn, Args);
5366 NewCall->takeName(CI);
5367 CI->replaceAllUsesWith(NewCall);
5368 CI->eraseFromParent();
5369 return;
5370 }
5371 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5372 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5373 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5374 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5375 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5376 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5377 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5378 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5379 SmallVector<Value *, 16> Args(CI->args());
5380
5381 // Create AddrSpaceCast to shared_cluster if needed.
5382 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5383 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5385 Args[0] = Builder.CreateAddrSpaceCast(
5386 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5387
5388 // Attach the flag argument for cta_group, with a
5389 // default value of 0. This handles case (2) in
5390 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5391 size_t NumArgs = CI->arg_size();
5392 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5393 if (!FlagArg->getType()->isIntegerTy(1))
5394 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5395
5396 NewCall = Builder.CreateCall(NewFn, Args);
5397 NewCall->takeName(CI);
5398 CI->replaceAllUsesWith(NewCall);
5399 CI->eraseFromParent();
5400 return;
5401 }
5402 case Intrinsic::riscv_sha256sig0:
5403 case Intrinsic::riscv_sha256sig1:
5404 case Intrinsic::riscv_sha256sum0:
5405 case Intrinsic::riscv_sha256sum1:
5406 case Intrinsic::riscv_sm3p0:
5407 case Intrinsic::riscv_sm3p1: {
5408 // The last argument to these intrinsics used to be i8 and changed to i32.
5409 // The type overload for sm4ks and sm4ed was removed.
5410 if (!CI->getType()->isIntegerTy(64))
5411 return;
5412
5413 Value *Arg =
5414 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5415
5416 NewCall = Builder.CreateCall(NewFn, Arg);
5417 Value *Res =
5418 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5419 NewCall->takeName(CI);
5420 CI->replaceAllUsesWith(Res);
5421 CI->eraseFromParent();
5422 return;
5423 }
5424
5425 case Intrinsic::x86_xop_vfrcz_ss:
5426 case Intrinsic::x86_xop_vfrcz_sd:
5427 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5428 break;
5429
5430 case Intrinsic::x86_xop_vpermil2pd:
5431 case Intrinsic::x86_xop_vpermil2ps:
5432 case Intrinsic::x86_xop_vpermil2pd_256:
5433 case Intrinsic::x86_xop_vpermil2ps_256: {
5434 SmallVector<Value *, 4> Args(CI->args());
5435 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5436 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5437 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5438 NewCall = Builder.CreateCall(NewFn, Args);
5439 break;
5440 }
5441
5442 case Intrinsic::x86_sse41_ptestc:
5443 case Intrinsic::x86_sse41_ptestz:
5444 case Intrinsic::x86_sse41_ptestnzc: {
5445 // The arguments for these intrinsics used to be v4f32, and changed
5446 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5447 // So, the only thing required is a bitcast for both arguments.
5448 // First, check the arguments have the old type.
5449 Value *Arg0 = CI->getArgOperand(0);
5450 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5451 return;
5452
5453 // Old intrinsic, add bitcasts
5454 Value *Arg1 = CI->getArgOperand(1);
5455
5456 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5457
5458 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5459 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5460
5461 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5462 break;
5463 }
5464
5465 case Intrinsic::x86_rdtscp: {
5466 // This used to take 1 arguments. If we have no arguments, it is already
5467 // upgraded.
5468 if (CI->getNumOperands() == 0)
5469 return;
5470
5471 NewCall = Builder.CreateCall(NewFn);
5472 // Extract the second result and store it.
5473 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5474 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5475 // Replace the original call result with the first result of the new call.
5476 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5477
5478 NewCall->takeName(CI);
5479 CI->replaceAllUsesWith(TSC);
5480 CI->eraseFromParent();
5481 return;
5482 }
5483
5484 case Intrinsic::x86_sse41_insertps:
5485 case Intrinsic::x86_sse41_dppd:
5486 case Intrinsic::x86_sse41_dpps:
5487 case Intrinsic::x86_sse41_mpsadbw:
5488 case Intrinsic::x86_avx_dp_ps_256:
5489 case Intrinsic::x86_avx2_mpsadbw: {
5490 // Need to truncate the last argument from i32 to i8 -- this argument models
5491 // an inherently 8-bit immediate operand to these x86 instructions.
5492 SmallVector<Value *, 4> Args(CI->args());
5493
5494 // Replace the last argument with a trunc.
5495 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5496 NewCall = Builder.CreateCall(NewFn, Args);
5497 break;
5498 }
5499
5500 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5501 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5502 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5503 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5504 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5505 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5506 SmallVector<Value *, 4> Args(CI->args());
5507 unsigned NumElts =
5508 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5509 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5510
5511 NewCall = Builder.CreateCall(NewFn, Args);
5512 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5513
5514 NewCall->takeName(CI);
5515 CI->replaceAllUsesWith(Res);
5516 CI->eraseFromParent();
5517 return;
5518 }
5519
5520 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5521 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5522 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5523 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5524 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5525 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5526 SmallVector<Value *, 4> Args(CI->args());
5527 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5528 if (NewFn->getIntrinsicID() ==
5529 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5530 Args[1] = Builder.CreateBitCast(
5531 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5532
5533 NewCall = Builder.CreateCall(NewFn, Args);
5534 Value *Res = Builder.CreateBitCast(
5535 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5536
5537 NewCall->takeName(CI);
5538 CI->replaceAllUsesWith(Res);
5539 CI->eraseFromParent();
5540 return;
5541 }
5542 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5543 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5544 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5545 SmallVector<Value *, 4> Args(CI->args());
5546 unsigned NumElts =
5547 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5548 Args[1] = Builder.CreateBitCast(
5549 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5550 Args[2] = Builder.CreateBitCast(
5551 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5552
5553 NewCall = Builder.CreateCall(NewFn, Args);
5554 break;
5555 }
5556
5557 case Intrinsic::thread_pointer: {
5558 NewCall = Builder.CreateCall(NewFn, {});
5559 break;
5560 }
5561
5562 case Intrinsic::memcpy:
5563 case Intrinsic::memmove:
5564 case Intrinsic::memset: {
5565 // We have to make sure that the call signature is what we're expecting.
5566 // We only want to change the old signatures by removing the alignment arg:
5567 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5568 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5569 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5570 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5571 // Note: i8*'s in the above can be any pointer type
5572 if (CI->arg_size() != 5) {
5573 DefaultCase();
5574 return;
5575 }
5576 // Remove alignment argument (3), and add alignment attributes to the
5577 // dest/src pointers.
5578 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5579 CI->getArgOperand(2), CI->getArgOperand(4)};
5580 NewCall = Builder.CreateCall(NewFn, Args);
5581 AttributeList OldAttrs = CI->getAttributes();
5582 AttributeList NewAttrs = AttributeList::get(
5583 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5584 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5585 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5586 NewCall->setAttributes(NewAttrs);
5587 auto *MemCI = cast<MemIntrinsic>(NewCall);
5588 // All mem intrinsics support dest alignment.
5590 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5591 // Memcpy/Memmove also support source alignment.
5592 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5593 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5594 break;
5595 }
5596
5597 case Intrinsic::masked_load:
5598 case Intrinsic::masked_gather:
5599 case Intrinsic::masked_store:
5600 case Intrinsic::masked_scatter: {
5601 if (CI->arg_size() != 4) {
5602 DefaultCase();
5603 return;
5604 }
5605
5606 auto GetMaybeAlign = [](Value *Op) {
5607 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5608 uint64_t Val = CI->getZExtValue();
5609 if (Val == 0)
5610 return MaybeAlign();
5611 if (isPowerOf2_64(Val))
5612 return MaybeAlign(Val);
5613 }
5614 reportFatalUsageError("Invalid alignment argument");
5615 };
5616 auto GetAlign = [&](Value *Op) {
5617 MaybeAlign Align = GetMaybeAlign(Op);
5618 if (Align)
5619 return *Align;
5620 reportFatalUsageError("Invalid zero alignment argument");
5621 };
5622
5623 const DataLayout &DL = CI->getDataLayout();
5624 switch (NewFn->getIntrinsicID()) {
5625 case Intrinsic::masked_load:
5626 NewCall = Builder.CreateMaskedLoad(
5627 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5628 CI->getArgOperand(2), CI->getArgOperand(3));
5629 break;
5630 case Intrinsic::masked_gather:
5631 NewCall = Builder.CreateMaskedGather(
5632 CI->getType(), CI->getArgOperand(0),
5633 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5634 CI->getType()->getScalarType()),
5635 CI->getArgOperand(2), CI->getArgOperand(3));
5636 break;
5637 case Intrinsic::masked_store:
5638 NewCall = Builder.CreateMaskedStore(
5639 CI->getArgOperand(0), CI->getArgOperand(1),
5640 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5641 break;
5642 case Intrinsic::masked_scatter:
5643 NewCall = Builder.CreateMaskedScatter(
5644 CI->getArgOperand(0), CI->getArgOperand(1),
5645 DL.getValueOrABITypeAlignment(
5646 GetMaybeAlign(CI->getArgOperand(2)),
5647 CI->getArgOperand(0)->getType()->getScalarType()),
5648 CI->getArgOperand(3));
5649 break;
5650 default:
5651 llvm_unreachable("Unexpected intrinsic ID");
5652 }
5653 // Previous metadata is still valid.
5654 NewCall->copyMetadata(*CI);
5655 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5656 break;
5657 }
5658
5659 case Intrinsic::lifetime_start:
5660 case Intrinsic::lifetime_end: {
5661 if (CI->arg_size() != 2) {
5662 DefaultCase();
5663 return;
5664 }
5665
5666 Value *Ptr = CI->getArgOperand(1);
5667 // Try to strip pointer casts, such that the lifetime works on an alloca.
5668 Ptr = Ptr->stripPointerCasts();
5669 if (isa<AllocaInst>(Ptr)) {
5670 // Don't use NewFn, as we might have looked through an addrspacecast.
5671 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5672 NewCall = Builder.CreateLifetimeStart(Ptr);
5673 else
5674 NewCall = Builder.CreateLifetimeEnd(Ptr);
5675 break;
5676 }
5677
5678 // Otherwise remove the lifetime marker.
5679 CI->eraseFromParent();
5680 return;
5681 }
5682
5683 case Intrinsic::x86_avx512_vpdpbusd_128:
5684 case Intrinsic::x86_avx512_vpdpbusd_256:
5685 case Intrinsic::x86_avx512_vpdpbusd_512:
5686 case Intrinsic::x86_avx512_vpdpbusds_128:
5687 case Intrinsic::x86_avx512_vpdpbusds_256:
5688 case Intrinsic::x86_avx512_vpdpbusds_512:
5689 case Intrinsic::x86_avx2_vpdpbssd_128:
5690 case Intrinsic::x86_avx2_vpdpbssd_256:
5691 case Intrinsic::x86_avx10_vpdpbssd_512:
5692 case Intrinsic::x86_avx2_vpdpbssds_128:
5693 case Intrinsic::x86_avx2_vpdpbssds_256:
5694 case Intrinsic::x86_avx10_vpdpbssds_512:
5695 case Intrinsic::x86_avx2_vpdpbsud_128:
5696 case Intrinsic::x86_avx2_vpdpbsud_256:
5697 case Intrinsic::x86_avx10_vpdpbsud_512:
5698 case Intrinsic::x86_avx2_vpdpbsuds_128:
5699 case Intrinsic::x86_avx2_vpdpbsuds_256:
5700 case Intrinsic::x86_avx10_vpdpbsuds_512:
5701 case Intrinsic::x86_avx2_vpdpbuud_128:
5702 case Intrinsic::x86_avx2_vpdpbuud_256:
5703 case Intrinsic::x86_avx10_vpdpbuud_512:
5704 case Intrinsic::x86_avx2_vpdpbuuds_128:
5705 case Intrinsic::x86_avx2_vpdpbuuds_256:
5706 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5707 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5708 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5709 CI->getArgOperand(2)};
5710 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5711 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5712 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5713
5714 NewCall = Builder.CreateCall(NewFn, Args);
5715 break;
5716 }
5717 case Intrinsic::x86_avx512_vpdpwssd_128:
5718 case Intrinsic::x86_avx512_vpdpwssd_256:
5719 case Intrinsic::x86_avx512_vpdpwssd_512:
5720 case Intrinsic::x86_avx512_vpdpwssds_128:
5721 case Intrinsic::x86_avx512_vpdpwssds_256:
5722 case Intrinsic::x86_avx512_vpdpwssds_512:
5723 case Intrinsic::x86_avx2_vpdpwsud_128:
5724 case Intrinsic::x86_avx2_vpdpwsud_256:
5725 case Intrinsic::x86_avx10_vpdpwsud_512:
5726 case Intrinsic::x86_avx2_vpdpwsuds_128:
5727 case Intrinsic::x86_avx2_vpdpwsuds_256:
5728 case Intrinsic::x86_avx10_vpdpwsuds_512:
5729 case Intrinsic::x86_avx2_vpdpwusd_128:
5730 case Intrinsic::x86_avx2_vpdpwusd_256:
5731 case Intrinsic::x86_avx10_vpdpwusd_512:
5732 case Intrinsic::x86_avx2_vpdpwusds_128:
5733 case Intrinsic::x86_avx2_vpdpwusds_256:
5734 case Intrinsic::x86_avx10_vpdpwusds_512:
5735 case Intrinsic::x86_avx2_vpdpwuud_128:
5736 case Intrinsic::x86_avx2_vpdpwuud_256:
5737 case Intrinsic::x86_avx10_vpdpwuud_512:
5738 case Intrinsic::x86_avx2_vpdpwuuds_128:
5739 case Intrinsic::x86_avx2_vpdpwuuds_256:
5740 case Intrinsic::x86_avx10_vpdpwuuds_512:
5741 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5742 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5743 CI->getArgOperand(2)};
5744 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5745 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5746 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5747
5748 NewCall = Builder.CreateCall(NewFn, Args);
5749 break;
5750 }
5751 assert(NewCall && "Should have either set this variable or returned through "
5752 "the default case");
5753 NewCall->takeName(CI);
5754 CI->replaceAllUsesWith(NewCall);
5755 CI->eraseFromParent();
5756}
5757
5759 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5760
5761 // Check if this function should be upgraded and get the replacement function
5762 // if there is one.
5763 Function *NewFn;
5764 if (UpgradeIntrinsicFunction(F, NewFn)) {
5765 // Replace all users of the old function with the new function or new
5766 // instructions. This is not a range loop because the call is deleted.
5767 for (User *U : make_early_inc_range(F->users()))
5768 if (CallBase *CB = dyn_cast<CallBase>(U))
5769 UpgradeIntrinsicCall(CB, NewFn);
5770
5771 // Remove old function, no longer used, from the module.
5772 if (F != NewFn)
5773 F->eraseFromParent();
5774 }
5775}
5776
5778 const unsigned NumOperands = MD.getNumOperands();
5779 if (NumOperands == 0)
5780 return &MD; // Invalid, punt to a verifier error.
5781
5782 // Check if the tag uses struct-path aware TBAA format.
5783 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5784 return &MD;
5785
5786 auto &Context = MD.getContext();
5787 if (NumOperands == 3) {
5788 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5789 MDNode *ScalarType = MDNode::get(Context, Elts);
5790 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5791 Metadata *Elts2[] = {ScalarType, ScalarType,
5794 MD.getOperand(2)};
5795 return MDNode::get(Context, Elts2);
5796 }
5797 // Create a MDNode <MD, MD, offset 0>
5799 Type::getInt64Ty(Context)))};
5800 return MDNode::get(Context, Elts);
5801}
5802
5804 Instruction *&Temp) {
5805 if (Opc != Instruction::BitCast)
5806 return nullptr;
5807
5808 Temp = nullptr;
5809 Type *SrcTy = V->getType();
5810 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5811 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5812 LLVMContext &Context = V->getContext();
5813
5814 // We have no information about target data layout, so we assume that
5815 // the maximum pointer size is 64bit.
5816 Type *MidTy = Type::getInt64Ty(Context);
5817 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5818
5819 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5820 }
5821
5822 return nullptr;
5823}
5824
5826 if (Opc != Instruction::BitCast)
5827 return nullptr;
5828
5829 Type *SrcTy = C->getType();
5830 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5831 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5832 LLVMContext &Context = C->getContext();
5833
5834 // We have no information about target data layout, so we assume that
5835 // the maximum pointer size is 64bit.
5836 Type *MidTy = Type::getInt64Ty(Context);
5837
5839 DestTy);
5840 }
5841
5842 return nullptr;
5843}
5844
5845/// Check the debug info version number, if it is out-dated, drop the debug
5846/// info. Return true if module is modified.
5849 return false;
5850
5851 llvm::TimeTraceScope timeScope("Upgrade debug info");
5852 // We need to get metadata before the module is verified (i.e., getModuleFlag
5853 // makes assumptions that we haven't verified yet). Carefully extract the flag
5854 // from the metadata.
5855 unsigned Version = 0;
5856 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5857 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5858 if (Flag->getNumOperands() < 3)
5859 return false;
5860 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5861 return K->getString() == "Debug Info Version";
5862 return false;
5863 });
5864 if (OpIt != ModFlags->op_end()) {
5865 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5866 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5867 Version = CI->getZExtValue();
5868 }
5869 }
5870
5872 bool BrokenDebugInfo = false;
5873 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5874 report_fatal_error("Broken module found, compilation aborted!");
5875 if (!BrokenDebugInfo)
5876 // Everything is ok.
5877 return false;
5878 else {
5879 // Diagnose malformed debug info.
5881 M.getContext().diagnose(Diag);
5882 }
5883 }
5884 bool Modified = StripDebugInfo(M);
5886 // Diagnose a version mismatch.
5888 M.getContext().diagnose(DiagVersion);
5889 }
5890 return Modified;
5891}
5892
5893static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5894 GlobalValue *GV, const Metadata *V) {
5895 Function *F = cast<Function>(GV);
5896
5897 constexpr StringLiteral DefaultValue = "1";
5898 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5899 unsigned Length = 0;
5900
5901 if (F->hasFnAttribute(Attr)) {
5902 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5903 // parse these elements placing them into Vect3
5904 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5905 for (; Length < 3 && !S.empty(); Length++) {
5906 auto [Part, Rest] = S.split(',');
5907 Vect3[Length] = Part.trim();
5908 S = Rest;
5909 }
5910 }
5911
5912 const unsigned Dim = DimC - 'x';
5913 assert(Dim < 3 && "Unexpected dim char");
5914
5915 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5916
5917 // local variable required for StringRef in Vect3 to point to.
5918 const std::string VStr = llvm::utostr(VInt);
5919 Vect3[Dim] = VStr;
5920 Length = std::max(Length, Dim + 1);
5921
5922 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5923 F->addFnAttr(Attr, NewAttr);
5924}
5925
5926static inline bool isXYZ(StringRef S) {
5927 return S == "x" || S == "y" || S == "z";
5928}
5929
5931 const Metadata *V) {
5932 if (K == "kernel") {
5934 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5935 return true;
5936 }
5937 if (K == "align") {
5938 // V is a bitfeild specifying two 16-bit values. The alignment value is
5939 // specfied in low 16-bits, The index is specified in the high bits. For the
5940 // index, 0 indicates the return value while higher values correspond to
5941 // each parameter (idx = param + 1).
5942 const uint64_t AlignIdxValuePair =
5943 mdconst::extract<ConstantInt>(V)->getZExtValue();
5944 const unsigned Idx = (AlignIdxValuePair >> 16);
5945 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5946 cast<Function>(GV)->addAttributeAtIndex(
5947 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5948 return true;
5949 }
5950 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5951 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5953 return true;
5954 }
5955 if (K == "minctasm") {
5956 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5957 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
5958 return true;
5959 }
5960 if (K == "maxnreg") {
5961 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5962 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
5963 return true;
5964 }
5965 if (K.consume_front("maxntid") && isXYZ(K)) {
5967 return true;
5968 }
5969 if (K.consume_front("reqntid") && isXYZ(K)) {
5971 return true;
5972 }
5973 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5975 return true;
5976 }
5977 if (K == "grid_constant") {
5978 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
5979 for (const auto &Op : cast<MDNode>(V)->operands()) {
5980 // For some reason, the index is 1-based in the metadata. Good thing we're
5981 // able to auto-upgrade it!
5982 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5983 cast<Function>(GV)->addParamAttr(Index, Attr);
5984 }
5985 return true;
5986 }
5987
5988 return false;
5989}
5990
5992 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5993 if (!NamedMD)
5994 return;
5995
5996 SmallVector<MDNode *, 8> NewNodes;
5998 for (MDNode *MD : NamedMD->operands()) {
5999 if (!SeenNodes.insert(MD).second)
6000 continue;
6001
6002 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
6003 if (!GV)
6004 continue;
6005
6006 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
6007
6008 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
6009 // Each nvvm.annotations metadata entry will be of the following form:
6010 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
6011 // start index = 1, to skip the global variable key
6012 // increment = 2, to skip the value for each property-value pairs
6013 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6014 MDString *K = cast<MDString>(MD->getOperand(j));
6015 const MDOperand &V = MD->getOperand(j + 1);
6016 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
6017 if (!Upgraded)
6018 NewOperands.append({K, V});
6019 }
6020
6021 if (NewOperands.size() > 1)
6022 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
6023 }
6024
6025 NamedMD->clearOperands();
6026 for (MDNode *N : NewNodes)
6027 NamedMD->addOperand(N);
6028}
6029
6030/// This checks for objc retain release marker which should be upgraded. It
6031/// returns true if module is modified.
6033 bool Changed = false;
6034 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6035 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6036 if (ModRetainReleaseMarker) {
6037 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
6038 if (Op) {
6039 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
6040 if (ID) {
6041 SmallVector<StringRef, 4> ValueComp;
6042 ID->getString().split(ValueComp, "#");
6043 if (ValueComp.size() == 2) {
6044 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6045 ID = MDString::get(M.getContext(), NewValue);
6046 }
6047 M.addModuleFlag(Module::Error, MarkerKey, ID);
6048 M.eraseNamedMetadata(ModRetainReleaseMarker);
6049 Changed = true;
6050 }
6051 }
6052 }
6053 return Changed;
6054}
6055
6057 // This lambda converts normal function calls to ARC runtime functions to
6058 // intrinsic calls.
6059 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6060 llvm::Intrinsic::ID IntrinsicFunc) {
6061 Function *Fn = M.getFunction(OldFunc);
6062
6063 if (!Fn)
6064 return;
6065
6066 Function *NewFn =
6067 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
6068
6069 for (User *U : make_early_inc_range(Fn->users())) {
6071 if (!CI || CI->getCalledFunction() != Fn)
6072 continue;
6073
6074 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6075 FunctionType *NewFuncTy = NewFn->getFunctionType();
6077
6078 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6079 // value to the return type of the old function.
6080 if (NewFuncTy->getReturnType() != CI->getType() &&
6081 !CastInst::castIsValid(Instruction::BitCast, CI,
6082 NewFuncTy->getReturnType()))
6083 continue;
6084
6085 bool InvalidCast = false;
6086
6087 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6088 Value *Arg = CI->getArgOperand(I);
6089
6090 // Bitcast argument to the parameter type of the new function if it's
6091 // not a variadic argument.
6092 if (I < NewFuncTy->getNumParams()) {
6093 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6094 // to the parameter type of the new function.
6095 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6096 NewFuncTy->getParamType(I))) {
6097 InvalidCast = true;
6098 break;
6099 }
6100 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6101 }
6102 Args.push_back(Arg);
6103 }
6104
6105 if (InvalidCast)
6106 continue;
6107
6108 // Create a call instruction that calls the new function.
6109 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6110 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6111 NewCall->takeName(CI);
6112
6113 // Bitcast the return value back to the type of the old call.
6114 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6115
6116 if (!CI->use_empty())
6117 CI->replaceAllUsesWith(NewRetVal);
6118 CI->eraseFromParent();
6119 }
6120
6121 if (Fn->use_empty())
6122 Fn->eraseFromParent();
6123 };
6124
6125 // Unconditionally convert a call to "clang.arc.use" to a call to
6126 // "llvm.objc.clang.arc.use".
6127 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6128
6129 // Upgrade the retain release marker. If there is no need to upgrade
6130 // the marker, that means either the module is already new enough to contain
6131 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6133 return;
6134
6135 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6136 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6137 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6138 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6139 {"objc_autoreleaseReturnValue",
6140 llvm::Intrinsic::objc_autoreleaseReturnValue},
6141 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6142 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6143 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6144 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6145 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6146 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6147 {"objc_release", llvm::Intrinsic::objc_release},
6148 {"objc_retain", llvm::Intrinsic::objc_retain},
6149 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6150 {"objc_retainAutoreleaseReturnValue",
6151 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6152 {"objc_retainAutoreleasedReturnValue",
6153 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6154 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6155 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6156 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6157 {"objc_unsafeClaimAutoreleasedReturnValue",
6158 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6159 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6160 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6161 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6162 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6163 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6164 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6165 {"objc_arc_annotation_topdown_bbstart",
6166 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6167 {"objc_arc_annotation_topdown_bbend",
6168 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6169 {"objc_arc_annotation_bottomup_bbstart",
6170 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6171 {"objc_arc_annotation_bottomup_bbend",
6172 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6173
6174 for (auto &I : RuntimeFuncs)
6175 UpgradeToIntrinsic(I.first, I.second);
6176}
6177
6179 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6180 if (!ModFlags)
6181 return false;
6182
6183 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6184 bool HasSwiftVersionFlag = false;
6185 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6186 uint32_t SwiftABIVersion;
6187 auto Int8Ty = Type::getInt8Ty(M.getContext());
6188 auto Int32Ty = Type::getInt32Ty(M.getContext());
6189
6190 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6191 MDNode *Op = ModFlags->getOperand(I);
6192 if (Op->getNumOperands() != 3)
6193 continue;
6194 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6195 if (!ID)
6196 continue;
6197 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6198 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6199 Type::getInt32Ty(M.getContext()), B)),
6200 MDString::get(M.getContext(), ID->getString()),
6201 Op->getOperand(2)};
6202 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6203 Changed = true;
6204 };
6205
6206 if (ID->getString() == "Objective-C Image Info Version")
6207 HasObjCFlag = true;
6208 if (ID->getString() == "Objective-C Class Properties")
6209 HasClassProperties = true;
6210 // Upgrade PIC from Error/Max to Min.
6211 if (ID->getString() == "PIC Level") {
6212 if (auto *Behavior =
6214 uint64_t V = Behavior->getLimitedValue();
6215 if (V == Module::Error || V == Module::Max)
6216 SetBehavior(Module::Min);
6217 }
6218 }
6219 // Upgrade "PIE Level" from Error to Max.
6220 if (ID->getString() == "PIE Level")
6221 if (auto *Behavior =
6223 if (Behavior->getLimitedValue() == Module::Error)
6224 SetBehavior(Module::Max);
6225
6226 // Upgrade branch protection and return address signing module flags. The
6227 // module flag behavior for these fields were Error and now they are Min.
6228 if (ID->getString() == "branch-target-enforcement" ||
6229 ID->getString().starts_with("sign-return-address")) {
6230 if (auto *Behavior =
6232 if (Behavior->getLimitedValue() == Module::Error) {
6233 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6234 Metadata *Ops[3] = {
6235 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6236 Op->getOperand(1), Op->getOperand(2)};
6237 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6238 Changed = true;
6239 }
6240 }
6241 }
6242
6243 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6244 // section name so that llvm-lto will not complain about mismatching
6245 // module flags that is functionally the same.
6246 if (ID->getString() == "Objective-C Image Info Section") {
6247 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6248 SmallVector<StringRef, 4> ValueComp;
6249 Value->getString().split(ValueComp, " ");
6250 if (ValueComp.size() != 1) {
6251 std::string NewValue;
6252 for (auto &S : ValueComp)
6253 NewValue += S.str();
6254 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6255 MDString::get(M.getContext(), NewValue)};
6256 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6257 Changed = true;
6258 }
6259 }
6260 }
6261
6262 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6263 // If the higher bits are set, it adds new module flag for swift info.
6264 if (ID->getString() == "Objective-C Garbage Collection") {
6265 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6266 if (Md) {
6267 assert(Md->getValue() && "Expected non-empty metadata");
6268 auto Type = Md->getValue()->getType();
6269 if (Type == Int8Ty)
6270 continue;
6271 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6272 if ((Val & 0xff) != Val) {
6273 HasSwiftVersionFlag = true;
6274 SwiftABIVersion = (Val & 0xff00) >> 8;
6275 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6276 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6277 }
6278 Metadata *Ops[3] = {
6280 Op->getOperand(1),
6281 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6282 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6283 Changed = true;
6284 }
6285 }
6286
6287 if (ID->getString() == "amdgpu_code_object_version") {
6288 Metadata *Ops[3] = {
6289 Op->getOperand(0),
6290 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6291 Op->getOperand(2)};
6292 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6293 Changed = true;
6294 }
6295 }
6296
6297 // "Objective-C Class Properties" is recently added for Objective-C. We
6298 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6299 // flag of value 0, so we can correclty downgrade this flag when trying to
6300 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6301 // this module flag.
6302 if (HasObjCFlag && !HasClassProperties) {
6303 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6304 (uint32_t)0);
6305 Changed = true;
6306 }
6307
6308 if (HasSwiftVersionFlag) {
6309 M.addModuleFlag(Module::Error, "Swift ABI Version",
6310 SwiftABIVersion);
6311 M.addModuleFlag(Module::Error, "Swift Major Version",
6312 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6313 M.addModuleFlag(Module::Error, "Swift Minor Version",
6314 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6315 Changed = true;
6316 }
6317
6318 return Changed;
6319}
6320
6322 auto TrimSpaces = [](StringRef Section) -> std::string {
6323 SmallVector<StringRef, 5> Components;
6324 Section.split(Components, ',');
6325
6326 SmallString<32> Buffer;
6327 raw_svector_ostream OS(Buffer);
6328
6329 for (auto Component : Components)
6330 OS << ',' << Component.trim();
6331
6332 return std::string(OS.str().substr(1));
6333 };
6334
6335 for (auto &GV : M.globals()) {
6336 if (!GV.hasSection())
6337 continue;
6338
6339 StringRef Section = GV.getSection();
6340
6341 if (!Section.starts_with("__DATA, __objc_catlist"))
6342 continue;
6343
6344 // __DATA, __objc_catlist, regular, no_dead_strip
6345 // __DATA,__objc_catlist,regular,no_dead_strip
6346 GV.setSection(TrimSpaces(Section));
6347 }
6348}
6349
6350namespace {
6351// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6352// callsites within a function that did not also have the strictfp attribute.
6353// Since 10.0, if strict FP semantics are needed within a function, the
6354// function must have the strictfp attribute and all calls within the function
6355// must also have the strictfp attribute. This latter restriction is
6356// necessary to prevent unwanted libcall simplification when a function is
6357// being cloned (such as for inlining).
6358//
6359// The "dangling" strictfp attribute usage was only used to prevent constant
6360// folding and other libcall simplification. The nobuiltin attribute on the
6361// callsite has the same effect.
6362struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6363 StrictFPUpgradeVisitor() = default;
6364
6365 void visitCallBase(CallBase &Call) {
6366 if (!Call.isStrictFP())
6367 return;
6369 return;
6370 // If we get here, the caller doesn't have the strictfp attribute
6371 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6372 Call.removeFnAttr(Attribute::StrictFP);
6373 Call.addFnAttr(Attribute::NoBuiltin);
6374 }
6375};
6376
6377/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6378struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6379 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6380 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6381
6382 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6383 if (!RMW.isFloatingPointOperation())
6384 return;
6385
6386 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6387 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6388 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6389 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6390 }
6391};
6392} // namespace
6393
6395 // If a function definition doesn't have the strictfp attribute,
6396 // convert any callsite strictfp attributes to nobuiltin.
6397 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6398 StrictFPUpgradeVisitor SFPV;
6399 SFPV.visit(F);
6400 }
6401
6402 // Remove all incompatibile attributes from function.
6403 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6404 F.getReturnType(), F.getAttributes().getRetAttrs()));
6405 for (auto &Arg : F.args())
6406 Arg.removeAttrs(
6407 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6408
6409 bool AddingAttrs = false, RemovingAttrs = false;
6410 AttrBuilder AttrsToAdd(F.getContext());
6411 AttributeMask AttrsToRemove;
6412
6413 // Older versions of LLVM treated an "implicit-section-name" attribute
6414 // similarly to directly setting the section on a Function.
6415 if (Attribute A = F.getFnAttribute("implicit-section-name");
6416 A.isValid() && A.isStringAttribute()) {
6417 F.setSection(A.getValueAsString());
6418 AttrsToRemove.addAttribute("implicit-section-name");
6419 RemovingAttrs = true;
6420 }
6421
6422 if (Attribute A = F.getFnAttribute("nooutline");
6423 A.isValid() && A.isStringAttribute()) {
6424 AttrsToRemove.addAttribute("nooutline");
6425 AttrsToAdd.addAttribute(Attribute::NoOutline);
6426 AddingAttrs = RemovingAttrs = true;
6427 }
6428
6429 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6430 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6431 AttrsToRemove.addAttribute("uniform-work-group-size");
6432 RemovingAttrs = true;
6433 if (A.getValueAsString() == "true") {
6434 AttrsToAdd.addAttribute("uniform-work-group-size");
6435 AddingAttrs = true;
6436 }
6437 }
6438
6439 if (!F.empty()) {
6440 // For some reason this is called twice, and the first time is before any
6441 // instructions are loaded into the body.
6442
6443 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6444 A.isValid()) {
6445
6446 if (A.getValueAsBool()) {
6447 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6448 Visitor.visit(F);
6449 }
6450
6451 // We will leave behind dead attribute uses on external declarations, but
6452 // clang never added these to declarations anyway.
6453 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6454 RemovingAttrs = true;
6455 }
6456 }
6457
6458 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6459 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6460
6461 bool HandleDenormalMode = false;
6462
6463 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6464 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6465 if (ParsedMode.isValid()) {
6466 DenormalFPMath = ParsedMode;
6467 AttrsToRemove.addAttribute("denormal-fp-math");
6468 AddingAttrs = RemovingAttrs = true;
6469 HandleDenormalMode = true;
6470 }
6471 }
6472
6473 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6474 Attr.isValid()) {
6475 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6476 if (ParsedMode.isValid()) {
6477 DenormalFPMathF32 = ParsedMode;
6478 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6479 AddingAttrs = RemovingAttrs = true;
6480 HandleDenormalMode = true;
6481 }
6482 }
6483
6484 if (HandleDenormalMode)
6485 AttrsToAdd.addDenormalFPEnvAttr(
6486 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6487
6488 if (RemovingAttrs)
6489 F.removeFnAttrs(AttrsToRemove);
6490
6491 if (AddingAttrs)
6492 F.addFnAttrs(AttrsToAdd);
6493}
6494
6495// Check if the function attribute is not present and set it.
6497 StringRef Value) {
6498 if (!F.hasFnAttribute(FnAttrName))
6499 F.addFnAttr(FnAttrName, Value);
6500}
6501
6502// Check if the function attribute is not present and set it if needed.
6503// If the attribute is "false" then removes it.
6504// If the attribute is "true" resets it to a valueless attribute.
6505static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6506 if (!F.hasFnAttribute(FnAttrName)) {
6507 if (Set)
6508 F.addFnAttr(FnAttrName);
6509 } else {
6510 auto A = F.getFnAttribute(FnAttrName);
6511 if ("false" == A.getValueAsString())
6512 F.removeFnAttr(FnAttrName);
6513 else if ("true" == A.getValueAsString()) {
6514 F.removeFnAttr(FnAttrName);
6515 F.addFnAttr(FnAttrName);
6516 }
6517 }
6518}
6519
6521 Triple T(M.getTargetTriple());
6522 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6523 return;
6524
6525 uint64_t BTEValue = 0;
6526 uint64_t BPPLRValue = 0;
6527 uint64_t GCSValue = 0;
6528 uint64_t SRAValue = 0;
6529 uint64_t SRAALLValue = 0;
6530 uint64_t SRABKeyValue = 0;
6531
6532 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6533 if (ModFlags) {
6534 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6535 MDNode *Op = ModFlags->getOperand(I);
6536 if (Op->getNumOperands() != 3)
6537 continue;
6538
6539 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6540 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6541 if (!ID || !CI)
6542 continue;
6543
6544 StringRef IDStr = ID->getString();
6545 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6546 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6547 : IDStr == "guarded-control-stack" ? &GCSValue
6548 : IDStr == "sign-return-address" ? &SRAValue
6549 : IDStr == "sign-return-address-all" ? &SRAALLValue
6550 : IDStr == "sign-return-address-with-bkey"
6551 ? &SRABKeyValue
6552 : nullptr;
6553 if (!ValPtr)
6554 continue;
6555
6556 *ValPtr = CI->getZExtValue();
6557 if (*ValPtr == 2)
6558 return;
6559 }
6560 }
6561
6562 bool BTE = BTEValue == 1;
6563 bool BPPLR = BPPLRValue == 1;
6564 bool GCS = GCSValue == 1;
6565 bool SRA = SRAValue == 1;
6566
6567 StringRef SignTypeValue = "non-leaf";
6568 if (SRA && SRAALLValue == 1)
6569 SignTypeValue = "all";
6570
6571 StringRef SignKeyValue = "a_key";
6572 if (SRA && SRABKeyValue == 1)
6573 SignKeyValue = "b_key";
6574
6575 for (Function &F : M.getFunctionList()) {
6576 if (F.isDeclaration())
6577 continue;
6578
6579 if (SRA) {
6580 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6581 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6582 } else {
6583 if (auto A = F.getFnAttribute("sign-return-address");
6584 A.isValid() && "none" == A.getValueAsString()) {
6585 F.removeFnAttr("sign-return-address");
6586 F.removeFnAttr("sign-return-address-key");
6587 }
6588 }
6589 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6590 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6591 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6592 }
6593
6594 if (BTE)
6595 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6596 if (BPPLR)
6597 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6598 if (GCS)
6599 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6600 if (SRA) {
6601 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6602 if (SRAALLValue == 1)
6603 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6604 if (SRABKeyValue == 1)
6605 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6606 }
6607}
6608
6609static bool isOldLoopArgument(Metadata *MD) {
6610 auto *T = dyn_cast_or_null<MDTuple>(MD);
6611 if (!T)
6612 return false;
6613 if (T->getNumOperands() < 1)
6614 return false;
6615 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6616 if (!S)
6617 return false;
6618 return S->getString().starts_with("llvm.vectorizer.");
6619}
6620
6622 StringRef OldPrefix = "llvm.vectorizer.";
6623 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6624
6625 if (OldTag == "llvm.vectorizer.unroll")
6626 return MDString::get(C, "llvm.loop.interleave.count");
6627
6628 return MDString::get(
6629 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6630 .str());
6631}
6632
6634 auto *T = dyn_cast_or_null<MDTuple>(MD);
6635 if (!T)
6636 return MD;
6637 if (T->getNumOperands() < 1)
6638 return MD;
6639 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6640 if (!OldTag)
6641 return MD;
6642 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6643 return MD;
6644
6645 // This has an old tag. Upgrade it.
6647 Ops.reserve(T->getNumOperands());
6648 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6649 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6650 Ops.push_back(T->getOperand(I));
6651
6652 return MDTuple::get(T->getContext(), Ops);
6653}
6654
6656 auto *T = dyn_cast<MDTuple>(&N);
6657 if (!T)
6658 return &N;
6659
6660 if (none_of(T->operands(), isOldLoopArgument))
6661 return &N;
6662
6664 Ops.reserve(T->getNumOperands());
6665 for (Metadata *MD : T->operands())
6666 Ops.push_back(upgradeLoopArgument(MD));
6667
6668 return MDTuple::get(T->getContext(), Ops);
6669}
6670
6672 Triple T(TT);
6673 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6674 // the address space of globals to 1. This does not apply to SPIRV Logical.
6675 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6676 !DL.contains("-G") && !DL.starts_with("G")) {
6677 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6678 }
6679
6680 if (T.isLoongArch64() || T.isRISCV64()) {
6681 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6682 auto I = DL.find("-n64-");
6683 if (I != StringRef::npos)
6684 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6685 return DL.str();
6686 }
6687
6688 // AMDGPU data layout upgrades.
6689 std::string Res = DL.str();
6690 if (T.isAMDGPU()) {
6691 // Define address spaces for constants.
6692 if (!DL.contains("-G") && !DL.starts_with("G"))
6693 Res.append(Res.empty() ? "G1" : "-G1");
6694
6695 // AMDGCN data layout upgrades.
6696 if (T.isAMDGCN()) {
6697
6698 // Add missing non-integral declarations.
6699 // This goes before adding new address spaces to prevent incoherent string
6700 // values.
6701 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6702 Res.append("-ni:7:8:9");
6703 // Update ni:7 to ni:7:8:9.
6704 if (DL.ends_with("ni:7"))
6705 Res.append(":8:9");
6706 if (DL.ends_with("ni:7:8"))
6707 Res.append(":9");
6708
6709 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6710 // resources) An empty data layout has already been upgraded to G1 by now.
6711 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6712 Res.append("-p7:160:256:256:32");
6713 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6714 Res.append("-p8:128:128:128:48");
6715 constexpr StringRef OldP8("-p8:128:128-");
6716 if (DL.contains(OldP8))
6717 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6718 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6719 Res.append("-p9:192:256:256:32");
6720 }
6721
6722 // Upgrade the ELF mangling mode.
6723 if (!DL.contains("m:e"))
6724 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6725
6726 return Res;
6727 }
6728
6729 if (T.isSystemZ() && !DL.empty()) {
6730 // Make sure the stack alignment is present.
6731 if (!DL.contains("-S64"))
6732 return "E-S64" + DL.drop_front(1).str();
6733 return DL.str();
6734 }
6735
6736 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6737 // If the datalayout matches the expected format, add pointer size address
6738 // spaces to the datalayout.
6739 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6740 if (!DL.contains(AddrSpaces)) {
6742 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6743 if (R.match(Res, &Groups))
6744 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6745 }
6746 };
6747
6748 // AArch64 data layout upgrades.
6749 if (T.isAArch64()) {
6750 // Add "-Fn32"
6751 if (!DL.empty() && !DL.contains("-Fn32"))
6752 Res.append("-Fn32");
6753 AddPtr32Ptr64AddrSpaces();
6754 return Res;
6755 }
6756
6757 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6758 T.isWasm()) {
6759 // Mips64 with o32 ABI did not add "-i128:128".
6760 // Add "-i128:128"
6761 std::string I64 = "-i64:64";
6762 std::string I128 = "-i128:128";
6763 if (!StringRef(Res).contains(I128)) {
6764 size_t Pos = Res.find(I64);
6765 if (Pos != size_t(-1))
6766 Res.insert(Pos + I64.size(), I128);
6767 }
6768 }
6769
6770 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6771 size_t Pos = Res.find("-S128");
6772 if (Pos == StringRef::npos)
6773 Pos = Res.size();
6774 Res.insert(Pos, "-f64:32:64");
6775 }
6776
6777 if (!T.isX86())
6778 return Res;
6779
6780 AddPtr32Ptr64AddrSpaces();
6781
6782 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6783 // for i128 operations prior to this being reflected in the data layout, and
6784 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6785 // boundaries, so although this is a breaking change, the upgrade is expected
6786 // to fix more IR than it breaks.
6787 // Intel MCU is an exception and uses 4-byte-alignment.
6788 if (!T.isOSIAMCU()) {
6789 std::string I128 = "-i128:128";
6790 if (StringRef Ref = Res; !Ref.contains(I128)) {
6792 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6793 if (R.match(Res, &Groups))
6794 Res = (Groups[1] + I128 + Groups[3]).str();
6795 }
6796 }
6797
6798 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6799 // Raising the alignment is safe because Clang did not produce f80 values in
6800 // the MSVC environment before this upgrade was added.
6801 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6802 StringRef Ref = Res;
6803 auto I = Ref.find("-f80:32-");
6804 if (I != StringRef::npos)
6805 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6806 }
6807
6808 return Res;
6809}
6810
6811void llvm::UpgradeAttributes(AttrBuilder &B) {
6812 StringRef FramePointer;
6813 Attribute A = B.getAttribute("no-frame-pointer-elim");
6814 if (A.isValid()) {
6815 // The value can be "true" or "false".
6816 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6817 B.removeAttribute("no-frame-pointer-elim");
6818 }
6819 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6820 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6821 if (FramePointer != "all")
6822 FramePointer = "non-leaf";
6823 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6824 }
6825 if (!FramePointer.empty())
6826 B.addAttribute("frame-pointer", FramePointer);
6827
6828 A = B.getAttribute("null-pointer-is-valid");
6829 if (A.isValid()) {
6830 // The value can be "true" or "false".
6831 bool NullPointerIsValid = A.getValueAsString() == "true";
6832 B.removeAttribute("null-pointer-is-valid");
6833 if (NullPointerIsValid)
6834 B.addAttribute(Attribute::NullPointerIsValid);
6835 }
6836
6837 A = B.getAttribute("uniform-work-group-size");
6838 if (A.isValid()) {
6839 StringRef Val = A.getValueAsString();
6840 if (!Val.empty()) {
6841 bool IsTrue = Val == "true";
6842 B.removeAttribute("uniform-work-group-size");
6843 if (IsTrue)
6844 B.addAttribute("uniform-work-group-size");
6845 }
6846 }
6847}
6848
6849void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6850 // clang.arc.attachedcall bundles are now required to have an operand.
6851 // If they don't, it's okay to drop them entirely: when there is an operand,
6852 // the "attachedcall" is meaningful and required, but without an operand,
6853 // it's just a marker NOP. Dropping it merely prevents an optimization.
6854 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6855 return OBD.getTag() == "clang.arc.attachedcall" &&
6856 OBD.inputs().empty();
6857 });
6858}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:96
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:449
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:886
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:629
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1760
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:895
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
static constexpr size_t npos
Definition StringRef.h:58
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:311
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:287
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
@ Length
Definition DWP.cpp:558
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106