LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
51#include "llvm/Support/Regex.h"
54#include <cstdint>
55#include <cstring>
56#include <numeric>
57
58using namespace llvm;
59
60static cl::opt<bool>
61 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
62 cl::desc("Disable autoupgrade of debug info"));
63
64static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
65
66// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
67// changed their type from v4f32 to v2i64.
69 Function *&NewFn) {
70 // Check whether this is an old version of the function, which received
71 // v4f32 arguments.
72 Type *Arg0Type = F->getFunctionType()->getParamType(0);
73 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
74 return false;
75
76 // Yes, it's old, replace it with new version.
77 rename(F);
78 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
79 return true;
80}
81
82// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
83// arguments have changed their type from i32 to i8.
85 Function *&NewFn) {
86 // Check that the last argument is an i32.
87 Type *LastArgType = F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
89 if (!LastArgType->isIntegerTy(32))
90 return false;
91
92 // Move this function aside and map down.
93 rename(F);
94 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
95 return true;
96}
97
98// Upgrade the declaration of fp compare intrinsics that change return type
99// from scalar to vXi1 mask.
101 Function *&NewFn) {
102 // Check if the return type is a vector.
103 if (F->getReturnType()->isVectorTy())
104 return false;
105
106 rename(F);
107 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
108 return true;
109}
110
111// Upgrade the declaration of multiply and add bytes intrinsics whose input
112// arguments' types have changed from vectors of i32 to vectors of i8
114 Function *&NewFn) {
115 // check if input argument type is a vector of i8
116 Type *Arg1Type = F->getFunctionType()->getParamType(1);
117 Type *Arg2Type = F->getFunctionType()->getParamType(2);
118 if (Arg1Type->isVectorTy() &&
119 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
120 Arg2Type->isVectorTy() &&
121 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
122 return false;
123
124 rename(F);
125 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
126 return true;
127}
128
129// Upgrade the declaration of multipy and add words intrinsics whose input
130// arguments' types have changed to vectors of i32 to vectors of i16
132 Function *&NewFn) {
133 // check if input argument type is a vector of i16
134 Type *Arg1Type = F->getFunctionType()->getParamType(1);
135 Type *Arg2Type = F->getFunctionType()->getParamType(2);
136 if (Arg1Type->isVectorTy() &&
137 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
138 Arg2Type->isVectorTy() &&
139 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
140 return false;
141
142 rename(F);
143 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
144 return true;
145}
146
148 Function *&NewFn) {
149 if (F->getReturnType()->getScalarType()->isBFloatTy())
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 // All of the intrinsics matches below should be marked with which llvm
169 // version started autoupgrading them. At some point in the future we would
170 // like to use this information to remove upgrade code for some older
171 // intrinsics. It is currently undecided how we will determine that future
172 // point.
173 if (Name.consume_front("avx."))
174 return (Name.starts_with("blend.p") || // Added in 3.7
175 Name == "cvt.ps2.pd.256" || // Added in 3.9
176 Name == "cvtdq2.pd.256" || // Added in 3.9
177 Name == "cvtdq2.ps.256" || // Added in 7.0
178 Name.starts_with("movnt.") || // Added in 3.2
179 Name.starts_with("sqrt.p") || // Added in 7.0
180 Name.starts_with("storeu.") || // Added in 3.9
181 Name.starts_with("vbroadcast.s") || // Added in 3.5
182 Name.starts_with("vbroadcastf128") || // Added in 4.0
183 Name.starts_with("vextractf128.") || // Added in 3.7
184 Name.starts_with("vinsertf128.") || // Added in 3.7
185 Name.starts_with("vperm2f128.") || // Added in 6.0
186 Name.starts_with("vpermil.")); // Added in 3.1
187
188 if (Name.consume_front("avx2."))
189 return (Name == "movntdqa" || // Added in 5.0
190 Name.starts_with("pabs.") || // Added in 6.0
191 Name.starts_with("padds.") || // Added in 8.0
192 Name.starts_with("paddus.") || // Added in 8.0
193 Name.starts_with("pblendd.") || // Added in 3.7
194 Name == "pblendw" || // Added in 3.7
195 Name.starts_with("pbroadcast") || // Added in 3.8
196 Name.starts_with("pcmpeq.") || // Added in 3.1
197 Name.starts_with("pcmpgt.") || // Added in 3.1
198 Name.starts_with("pmax") || // Added in 3.9
199 Name.starts_with("pmin") || // Added in 3.9
200 Name.starts_with("pmovsx") || // Added in 3.9
201 Name.starts_with("pmovzx") || // Added in 3.9
202 Name == "pmul.dq" || // Added in 7.0
203 Name == "pmulu.dq" || // Added in 7.0
204 Name.starts_with("psll.dq") || // Added in 3.7
205 Name.starts_with("psrl.dq") || // Added in 3.7
206 Name.starts_with("psubs.") || // Added in 8.0
207 Name.starts_with("psubus.") || // Added in 8.0
208 Name.starts_with("vbroadcast") || // Added in 3.8
209 Name == "vbroadcasti128" || // Added in 3.7
210 Name == "vextracti128" || // Added in 3.7
211 Name == "vinserti128" || // Added in 3.7
212 Name == "vperm2i128"); // Added in 6.0
213
214 if (Name.consume_front("avx512.")) {
215 if (Name.consume_front("mask."))
216 // 'avx512.mask.*'
217 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
218 Name.starts_with("and.") || // Added in 3.9
219 Name.starts_with("andn.") || // Added in 3.9
220 Name.starts_with("broadcast.s") || // Added in 3.9
221 Name.starts_with("broadcastf32x4.") || // Added in 6.0
222 Name.starts_with("broadcastf32x8.") || // Added in 6.0
223 Name.starts_with("broadcastf64x2.") || // Added in 6.0
224 Name.starts_with("broadcastf64x4.") || // Added in 6.0
225 Name.starts_with("broadcasti32x4.") || // Added in 6.0
226 Name.starts_with("broadcasti32x8.") || // Added in 6.0
227 Name.starts_with("broadcasti64x2.") || // Added in 6.0
228 Name.starts_with("broadcasti64x4.") || // Added in 6.0
229 Name.starts_with("cmp.b") || // Added in 5.0
230 Name.starts_with("cmp.d") || // Added in 5.0
231 Name.starts_with("cmp.q") || // Added in 5.0
232 Name.starts_with("cmp.w") || // Added in 5.0
233 Name.starts_with("compress.b") || // Added in 9.0
234 Name.starts_with("compress.d") || // Added in 9.0
235 Name.starts_with("compress.p") || // Added in 9.0
236 Name.starts_with("compress.q") || // Added in 9.0
237 Name.starts_with("compress.store.") || // Added in 7.0
238 Name.starts_with("compress.w") || // Added in 9.0
239 Name.starts_with("conflict.") || // Added in 9.0
240 Name.starts_with("cvtdq2pd.") || // Added in 4.0
241 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
242 Name == "cvtpd2dq.256" || // Added in 7.0
243 Name == "cvtpd2ps.256" || // Added in 7.0
244 Name == "cvtps2pd.128" || // Added in 7.0
245 Name == "cvtps2pd.256" || // Added in 7.0
246 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
247 Name == "cvtqq2ps.256" || // Added in 9.0
248 Name == "cvtqq2ps.512" || // Added in 9.0
249 Name == "cvttpd2dq.256" || // Added in 7.0
250 Name == "cvttps2dq.128" || // Added in 7.0
251 Name == "cvttps2dq.256" || // Added in 7.0
252 Name.starts_with("cvtudq2pd.") || // Added in 4.0
253 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
254 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
255 Name == "cvtuqq2ps.256" || // Added in 9.0
256 Name == "cvtuqq2ps.512" || // Added in 9.0
257 Name.starts_with("dbpsadbw.") || // Added in 7.0
258 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
259 Name.starts_with("expand.b") || // Added in 9.0
260 Name.starts_with("expand.d") || // Added in 9.0
261 Name.starts_with("expand.load.") || // Added in 7.0
262 Name.starts_with("expand.p") || // Added in 9.0
263 Name.starts_with("expand.q") || // Added in 9.0
264 Name.starts_with("expand.w") || // Added in 9.0
265 Name.starts_with("fpclass.p") || // Added in 7.0
266 Name.starts_with("insert") || // Added in 4.0
267 Name.starts_with("load.") || // Added in 3.9
268 Name.starts_with("loadu.") || // Added in 3.9
269 Name.starts_with("lzcnt.") || // Added in 5.0
270 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
271 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
272 Name.starts_with("movddup") || // Added in 3.9
273 Name.starts_with("move.s") || // Added in 4.0
274 Name.starts_with("movshdup") || // Added in 3.9
275 Name.starts_with("movsldup") || // Added in 3.9
276 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
277 Name.starts_with("or.") || // Added in 3.9
278 Name.starts_with("pabs.") || // Added in 6.0
279 Name.starts_with("packssdw.") || // Added in 5.0
280 Name.starts_with("packsswb.") || // Added in 5.0
281 Name.starts_with("packusdw.") || // Added in 5.0
282 Name.starts_with("packuswb.") || // Added in 5.0
283 Name.starts_with("padd.") || // Added in 4.0
284 Name.starts_with("padds.") || // Added in 8.0
285 Name.starts_with("paddus.") || // Added in 8.0
286 Name.starts_with("palignr.") || // Added in 3.9
287 Name.starts_with("pand.") || // Added in 3.9
288 Name.starts_with("pandn.") || // Added in 3.9
289 Name.starts_with("pavg") || // Added in 6.0
290 Name.starts_with("pbroadcast") || // Added in 6.0
291 Name.starts_with("pcmpeq.") || // Added in 3.9
292 Name.starts_with("pcmpgt.") || // Added in 3.9
293 Name.starts_with("perm.df.") || // Added in 3.9
294 Name.starts_with("perm.di.") || // Added in 3.9
295 Name.starts_with("permvar.") || // Added in 7.0
296 Name.starts_with("pmaddubs.w.") || // Added in 7.0
297 Name.starts_with("pmaddw.d.") || // Added in 7.0
298 Name.starts_with("pmax") || // Added in 4.0
299 Name.starts_with("pmin") || // Added in 4.0
300 Name == "pmov.qd.256" || // Added in 9.0
301 Name == "pmov.qd.512" || // Added in 9.0
302 Name == "pmov.wb.256" || // Added in 9.0
303 Name == "pmov.wb.512" || // Added in 9.0
304 Name.starts_with("pmovsx") || // Added in 4.0
305 Name.starts_with("pmovzx") || // Added in 4.0
306 Name.starts_with("pmul.dq.") || // Added in 4.0
307 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
308 Name.starts_with("pmulh.w.") || // Added in 7.0
309 Name.starts_with("pmulhu.w.") || // Added in 7.0
310 Name.starts_with("pmull.") || // Added in 4.0
311 Name.starts_with("pmultishift.qb.") || // Added in 8.0
312 Name.starts_with("pmulu.dq.") || // Added in 4.0
313 Name.starts_with("por.") || // Added in 3.9
314 Name.starts_with("prol.") || // Added in 8.0
315 Name.starts_with("prolv.") || // Added in 8.0
316 Name.starts_with("pror.") || // Added in 8.0
317 Name.starts_with("prorv.") || // Added in 8.0
318 Name.starts_with("pshuf.b.") || // Added in 4.0
319 Name.starts_with("pshuf.d.") || // Added in 3.9
320 Name.starts_with("pshufh.w.") || // Added in 3.9
321 Name.starts_with("pshufl.w.") || // Added in 3.9
322 Name.starts_with("psll.d") || // Added in 4.0
323 Name.starts_with("psll.q") || // Added in 4.0
324 Name.starts_with("psll.w") || // Added in 4.0
325 Name.starts_with("pslli") || // Added in 4.0
326 Name.starts_with("psllv") || // Added in 4.0
327 Name.starts_with("psra.d") || // Added in 4.0
328 Name.starts_with("psra.q") || // Added in 4.0
329 Name.starts_with("psra.w") || // Added in 4.0
330 Name.starts_with("psrai") || // Added in 4.0
331 Name.starts_with("psrav") || // Added in 4.0
332 Name.starts_with("psrl.d") || // Added in 4.0
333 Name.starts_with("psrl.q") || // Added in 4.0
334 Name.starts_with("psrl.w") || // Added in 4.0
335 Name.starts_with("psrli") || // Added in 4.0
336 Name.starts_with("psrlv") || // Added in 4.0
337 Name.starts_with("psub.") || // Added in 4.0
338 Name.starts_with("psubs.") || // Added in 8.0
339 Name.starts_with("psubus.") || // Added in 8.0
340 Name.starts_with("pternlog.") || // Added in 7.0
341 Name.starts_with("punpckh") || // Added in 3.9
342 Name.starts_with("punpckl") || // Added in 3.9
343 Name.starts_with("pxor.") || // Added in 3.9
344 Name.starts_with("shuf.f") || // Added in 6.0
345 Name.starts_with("shuf.i") || // Added in 6.0
346 Name.starts_with("shuf.p") || // Added in 4.0
347 Name.starts_with("sqrt.p") || // Added in 7.0
348 Name.starts_with("store.b.") || // Added in 3.9
349 Name.starts_with("store.d.") || // Added in 3.9
350 Name.starts_with("store.p") || // Added in 3.9
351 Name.starts_with("store.q.") || // Added in 3.9
352 Name.starts_with("store.w.") || // Added in 3.9
353 Name == "store.ss" || // Added in 7.0
354 Name.starts_with("storeu.") || // Added in 3.9
355 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
356 Name.starts_with("ucmp.") || // Added in 5.0
357 Name.starts_with("unpckh.") || // Added in 3.9
358 Name.starts_with("unpckl.") || // Added in 3.9
359 Name.starts_with("valign.") || // Added in 4.0
360 Name == "vcvtph2ps.128" || // Added in 11.0
361 Name == "vcvtph2ps.256" || // Added in 11.0
362 Name.starts_with("vextract") || // Added in 4.0
363 Name.starts_with("vfmadd.") || // Added in 7.0
364 Name.starts_with("vfmaddsub.") || // Added in 7.0
365 Name.starts_with("vfnmadd.") || // Added in 7.0
366 Name.starts_with("vfnmsub.") || // Added in 7.0
367 Name.starts_with("vpdpbusd.") || // Added in 7.0
368 Name.starts_with("vpdpbusds.") || // Added in 7.0
369 Name.starts_with("vpdpwssd.") || // Added in 7.0
370 Name.starts_with("vpdpwssds.") || // Added in 7.0
371 Name.starts_with("vpermi2var.") || // Added in 7.0
372 Name.starts_with("vpermil.p") || // Added in 3.9
373 Name.starts_with("vpermilvar.") || // Added in 4.0
374 Name.starts_with("vpermt2var.") || // Added in 7.0
375 Name.starts_with("vpmadd52") || // Added in 7.0
376 Name.starts_with("vpshld.") || // Added in 7.0
377 Name.starts_with("vpshldv.") || // Added in 8.0
378 Name.starts_with("vpshrd.") || // Added in 7.0
379 Name.starts_with("vpshrdv.") || // Added in 8.0
380 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
381 Name.starts_with("xor.")); // Added in 3.9
382
383 if (Name.consume_front("mask3."))
384 // 'avx512.mask3.*'
385 return (Name.starts_with("vfmadd.") || // Added in 7.0
386 Name.starts_with("vfmaddsub.") || // Added in 7.0
387 Name.starts_with("vfmsub.") || // Added in 7.0
388 Name.starts_with("vfmsubadd.") || // Added in 7.0
389 Name.starts_with("vfnmsub.")); // Added in 7.0
390
391 if (Name.consume_front("maskz."))
392 // 'avx512.maskz.*'
393 return (Name.starts_with("pternlog.") || // Added in 7.0
394 Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmaddsub.") || // Added in 7.0
396 Name.starts_with("vpdpbusd.") || // Added in 7.0
397 Name.starts_with("vpdpbusds.") || // Added in 7.0
398 Name.starts_with("vpdpwssd.") || // Added in 7.0
399 Name.starts_with("vpdpwssds.") || // Added in 7.0
400 Name.starts_with("vpermt2var.") || // Added in 7.0
401 Name.starts_with("vpmadd52") || // Added in 7.0
402 Name.starts_with("vpshldv.") || // Added in 8.0
403 Name.starts_with("vpshrdv.")); // Added in 8.0
404
405 // 'avx512.*'
406 return (Name == "movntdqa" || // Added in 5.0
407 Name == "pmul.dq.512" || // Added in 7.0
408 Name == "pmulu.dq.512" || // Added in 7.0
409 Name.starts_with("broadcastm") || // Added in 6.0
410 Name.starts_with("cmp.p") || // Added in 12.0
411 Name.starts_with("cvtb2mask.") || // Added in 7.0
412 Name.starts_with("cvtd2mask.") || // Added in 7.0
413 Name.starts_with("cvtmask2") || // Added in 5.0
414 Name.starts_with("cvtq2mask.") || // Added in 7.0
415 Name == "cvtusi2sd" || // Added in 7.0
416 Name.starts_with("cvtw2mask.") || // Added in 7.0
417 Name == "kand.w" || // Added in 7.0
418 Name == "kandn.w" || // Added in 7.0
419 Name == "knot.w" || // Added in 7.0
420 Name == "kor.w" || // Added in 7.0
421 Name == "kortestc.w" || // Added in 7.0
422 Name == "kortestz.w" || // Added in 7.0
423 Name.starts_with("kunpck") || // added in 6.0
424 Name == "kxnor.w" || // Added in 7.0
425 Name == "kxor.w" || // Added in 7.0
426 Name.starts_with("padds.") || // Added in 8.0
427 Name.starts_with("pbroadcast") || // Added in 3.9
428 Name.starts_with("prol") || // Added in 8.0
429 Name.starts_with("pror") || // Added in 8.0
430 Name.starts_with("psll.dq") || // Added in 3.9
431 Name.starts_with("psrl.dq") || // Added in 3.9
432 Name.starts_with("psubs.") || // Added in 8.0
433 Name.starts_with("ptestm") || // Added in 6.0
434 Name.starts_with("ptestnm") || // Added in 6.0
435 Name.starts_with("storent.") || // Added in 3.9
436 Name.starts_with("vbroadcast.s") || // Added in 7.0
437 Name.starts_with("vpshld.") || // Added in 8.0
438 Name.starts_with("vpshrd.")); // Added in 8.0
439 }
440
441 if (Name.consume_front("fma."))
442 return (Name.starts_with("vfmadd.") || // Added in 7.0
443 Name.starts_with("vfmsub.") || // Added in 7.0
444 Name.starts_with("vfmsubadd.") || // Added in 7.0
445 Name.starts_with("vfnmadd.") || // Added in 7.0
446 Name.starts_with("vfnmsub.")); // Added in 7.0
447
448 if (Name.consume_front("fma4."))
449 return Name.starts_with("vfmadd.s"); // Added in 7.0
450
451 if (Name.consume_front("sse."))
452 return (Name == "add.ss" || // Added in 4.0
453 Name == "cvtsi2ss" || // Added in 7.0
454 Name == "cvtsi642ss" || // Added in 7.0
455 Name == "div.ss" || // Added in 4.0
456 Name == "mul.ss" || // Added in 4.0
457 Name.starts_with("sqrt.p") || // Added in 7.0
458 Name == "sqrt.ss" || // Added in 7.0
459 Name.starts_with("storeu.") || // Added in 3.9
460 Name == "sub.ss"); // Added in 4.0
461
462 if (Name.consume_front("sse2."))
463 return (Name == "add.sd" || // Added in 4.0
464 Name == "cvtdq2pd" || // Added in 3.9
465 Name == "cvtdq2ps" || // Added in 7.0
466 Name == "cvtps2pd" || // Added in 3.9
467 Name == "cvtsi2sd" || // Added in 7.0
468 Name == "cvtsi642sd" || // Added in 7.0
469 Name == "cvtss2sd" || // Added in 7.0
470 Name == "div.sd" || // Added in 4.0
471 Name == "mul.sd" || // Added in 4.0
472 Name.starts_with("padds.") || // Added in 8.0
473 Name.starts_with("paddus.") || // Added in 8.0
474 Name.starts_with("pcmpeq.") || // Added in 3.1
475 Name.starts_with("pcmpgt.") || // Added in 3.1
476 Name == "pmaxs.w" || // Added in 3.9
477 Name == "pmaxu.b" || // Added in 3.9
478 Name == "pmins.w" || // Added in 3.9
479 Name == "pminu.b" || // Added in 3.9
480 Name == "pmulu.dq" || // Added in 7.0
481 Name.starts_with("pshuf") || // Added in 3.9
482 Name.starts_with("psll.dq") || // Added in 3.7
483 Name.starts_with("psrl.dq") || // Added in 3.7
484 Name.starts_with("psubs.") || // Added in 8.0
485 Name.starts_with("psubus.") || // Added in 8.0
486 Name.starts_with("sqrt.p") || // Added in 7.0
487 Name == "sqrt.sd" || // Added in 7.0
488 Name == "storel.dq" || // Added in 3.9
489 Name.starts_with("storeu.") || // Added in 3.9
490 Name == "sub.sd"); // Added in 4.0
491
492 if (Name.consume_front("sse41."))
493 return (Name.starts_with("blendp") || // Added in 3.7
494 Name == "movntdqa" || // Added in 5.0
495 Name == "pblendw" || // Added in 3.7
496 Name == "pmaxsb" || // Added in 3.9
497 Name == "pmaxsd" || // Added in 3.9
498 Name == "pmaxud" || // Added in 3.9
499 Name == "pmaxuw" || // Added in 3.9
500 Name == "pminsb" || // Added in 3.9
501 Name == "pminsd" || // Added in 3.9
502 Name == "pminud" || // Added in 3.9
503 Name == "pminuw" || // Added in 3.9
504 Name.starts_with("pmovsx") || // Added in 3.8
505 Name.starts_with("pmovzx") || // Added in 3.9
506 Name == "pmuldq"); // Added in 7.0
507
508 if (Name.consume_front("sse42."))
509 return Name == "crc32.64.8"; // Added in 3.4
510
511 if (Name.consume_front("sse4a."))
512 return Name.starts_with("movnt."); // Added in 3.9
513
514 if (Name.consume_front("ssse3."))
515 return (Name == "pabs.b.128" || // Added in 6.0
516 Name == "pabs.d.128" || // Added in 6.0
517 Name == "pabs.w.128"); // Added in 6.0
518
519 if (Name.consume_front("xop."))
520 return (Name == "vpcmov" || // Added in 3.8
521 Name == "vpcmov.256" || // Added in 5.0
522 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
523 Name.starts_with("vprot")); // Added in 8.0
524
525 return (Name == "addcarry.u32" || // Added in 8.0
526 Name == "addcarry.u64" || // Added in 8.0
527 Name == "addcarryx.u32" || // Added in 8.0
528 Name == "addcarryx.u64" || // Added in 8.0
529 Name == "subborrow.u32" || // Added in 8.0
530 Name == "subborrow.u64" || // Added in 8.0
531 Name.starts_with("vcvtph2ps.")); // Added in 11.0
532}
533
535 Function *&NewFn) {
536 // Only handle intrinsics that start with "x86.".
537 if (!Name.consume_front("x86."))
538 return false;
539
540 if (shouldUpgradeX86Intrinsic(F, Name)) {
541 NewFn = nullptr;
542 return true;
543 }
544
545 if (Name == "rdtscp") { // Added in 8.0
546 // If this intrinsic has 0 operands, it's the new version.
547 if (F->getFunctionType()->getNumParams() == 0)
548 return false;
549
550 rename(F);
551 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
552 Intrinsic::x86_rdtscp);
553 return true;
554 }
555
557
558 // SSE4.1 ptest functions may have an old signature.
559 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
561 .Case("c", Intrinsic::x86_sse41_ptestc)
562 .Case("z", Intrinsic::x86_sse41_ptestz)
563 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
566 return upgradePTESTIntrinsic(F, ID, NewFn);
567
568 return false;
569 }
570
571 // Several blend and other instructions with masks used the wrong number of
572 // bits.
573
574 // Added in 3.6
576 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
584 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
585
586 if (Name.consume_front("avx512.")) {
587 if (Name.consume_front("mask.cmp.")) {
588 // Added in 7.0
590 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
598 return upgradeX86MaskedFPCompare(F, ID, NewFn);
599 } else if (Name.starts_with("vpdpbusd.") ||
600 Name.starts_with("vpdpbusds.")) {
601 // Added in 21.1
603 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
611 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
612 } else if (Name.starts_with("vpdpwssd.") ||
613 Name.starts_with("vpdpwssds.")) {
614 // Added in 21.1
616 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
624 return upgradeX86MultiplyAddWords(F, ID, NewFn);
625 }
626 return false; // No other 'x86.avx512.*'.
627 }
628
629 if (Name.consume_front("avx2.")) {
630 if (Name.consume_front("vpdpb")) {
631 // Added in 21.1
633 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
647 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
648 } else if (Name.consume_front("vpdpw")) {
649 // Added in 21.1
651 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
665 return upgradeX86MultiplyAddWords(F, ID, NewFn);
666 }
667 return false; // No other 'x86.avx2.*'
668 }
669
670 if (Name.consume_front("avx10.")) {
671 if (Name.consume_front("vpdpb")) {
672 // Added in 21.1
674 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
682 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
683 } else if (Name.consume_front("vpdpw")) {
685 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
693 return upgradeX86MultiplyAddWords(F, ID, NewFn);
694 }
695 return false; // No other 'x86.avx10.*'
696 }
697
698 if (Name.consume_front("avx512bf16.")) {
699 // Added in 9.0
701 .Case("cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .Case("cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .Case("cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .Case("mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .Case("cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .Case("cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
715 return upgradeX86BF16Intrinsic(F, ID, NewFn);
716
717 // Added in 9.0
719 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
724 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
725 return false; // No other 'x86.avx512bf16.*'.
726 }
727
728 if (Name.consume_front("xop.")) {
730 if (Name.starts_with("vpermil2")) { // Added in 3.9
731 // Upgrade any XOP PERMIL2 index operand still using a float/double
732 // vector.
733 auto Idx = F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
743 else
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
745 }
746 } else if (F->arg_size() == 2)
747 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
749 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
752
754 rename(F);
755 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
756 return true;
757 }
758 return false; // No other 'x86.xop.*'
759 }
760
761 if (Name == "seh.recoverfp") {
762 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
763 Intrinsic::eh_recoverfp);
764 return true;
765 }
766
767 return false;
768}
769
770// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
771// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
773 StringRef Name,
774 Function *&NewFn) {
775 if (Name.starts_with("rbit")) {
776 // '(arm|aarch64).rbit'.
778 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
779 return true;
780 }
781
782 if (Name == "thread.pointer") {
783 // '(arm|aarch64).thread.pointer'.
785 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
786 return true;
787 }
788
789 bool Neon = Name.consume_front("neon.");
790 if (Neon) {
791 // '(arm|aarch64).neon.*'.
792 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
793 // v16i8 respectively.
794 if (Name.consume_front("bfdot.")) {
795 // (arm|aarch64).neon.bfdot.*'.
798 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
799 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
800 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
803 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
806 LLVMContext &Ctx = F->getParent()->getContext();
807 std::array<Type *, 2> Tys{
808 {F->getReturnType(),
809 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
810 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
811 return true;
812 }
813 return false; // No other '(arm|aarch64).neon.bfdot.*'.
814 }
815
816 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
817 // anymore and accept v8bf16 instead of v16i8.
818 if (Name.consume_front("bfm")) {
819 // (arm|aarch64).neon.bfm*'.
820 if (Name.consume_back(".v4f32.v16i8")) {
821 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
824 .Case("mla",
825 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
826 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
827 .Case("lalb",
828 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
829 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
830 .Case("lalt",
831 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
832 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
835 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
836 return true;
837 }
838 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
839 }
840 return false; // No other '(arm|aarch64).neon.bfm*.
841 }
842 // Continue on to Aarch64 Neon or Arm Neon.
843 }
844 // Continue on to Arm or Aarch64.
845
846 if (IsArm) {
847 // 'arm.*'.
848 if (Neon) {
849 // 'arm.neon.*'.
851 .StartsWith("vclz.", Intrinsic::ctlz)
852 .StartsWith("vcnt.", Intrinsic::ctpop)
853 .StartsWith("vqadds.", Intrinsic::sadd_sat)
854 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
855 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
856 .StartsWith("vqsubu.", Intrinsic::usub_sat)
857 .StartsWith("vrinta.", Intrinsic::round)
858 .StartsWith("vrintn.", Intrinsic::roundeven)
859 .StartsWith("vrintm.", Intrinsic::floor)
860 .StartsWith("vrintp.", Intrinsic::ceil)
861 .StartsWith("vrintx.", Intrinsic::rint)
862 .StartsWith("vrintz.", Intrinsic::trunc)
865 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
866 F->arg_begin()->getType());
867 return true;
868 }
869
870 if (Name.consume_front("vst")) {
871 // 'arm.neon.vst*'.
872 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
874 if (vstRegex.match(Name, &Groups)) {
875 static const Intrinsic::ID StoreInts[] = {
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
878
879 static const Intrinsic::ID StoreLaneInts[] = {
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
882
883 auto fArgs = F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
885 if (Groups[1].size() == 1)
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
888 else
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
891 return true;
892 }
893 return false; // No other 'arm.neon.vst*'.
894 }
895
896 return false; // No other 'arm.neon.*'.
897 }
898
899 if (Name.consume_front("mve.")) {
900 // 'arm.mve.*'.
901 if (Name == "vctp64") {
902 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
903 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
904 // the function and deal with it below in UpgradeIntrinsicCall.
905 rename(F);
906 return true;
907 }
908 return false; // Not 'arm.mve.vctp64'.
909 }
910
911 if (Name.starts_with("vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
914 return true;
915 }
916
917 // These too are changed to accept a v2i1 instead of the old v4i1.
918 if (Name.consume_back(".v4i1")) {
919 // 'arm.mve.*.v4i1'.
920 if (Name.consume_back(".predicated.v2i64.v4i32"))
921 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
922 return Name == "mull.int" || Name == "vqdmull";
923
924 if (Name.consume_back(".v2i64")) {
925 // 'arm.mve.*.v2i64.v4i1'
926 bool IsGather = Name.consume_front("vldr.gather.");
927 if (IsGather || Name.consume_front("vstr.scatter.")) {
928 if (Name.consume_front("base.")) {
929 // Optional 'wb.' prefix.
930 Name.consume_front("wb.");
931 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
932 // predicated.v2i64.v2i64.v4i1'.
933 return Name == "predicated.v2i64";
934 }
935
936 if (Name.consume_front("offset.predicated."))
937 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
938 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
939
940 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
941 return false;
942 }
943
944 return false; // No other 'arm.mve.*.v2i64.v4i1'.
945 }
946 return false; // No other 'arm.mve.*.v4i1'.
947 }
948 return false; // No other 'arm.mve.*'.
949 }
950
951 if (Name.consume_front("cde.vcx")) {
952 // 'arm.cde.vcx*'.
953 if (Name.consume_back(".predicated.v2i64.v4i1"))
954 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
955 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
956 Name == "3q" || Name == "3qa";
957
958 return false; // No other 'arm.cde.vcx*'.
959 }
960 } else {
961 // 'aarch64.*'.
962 if (Neon) {
963 // 'aarch64.neon.*'.
965 .StartsWith("frintn", Intrinsic::roundeven)
966 .StartsWith("rbit", Intrinsic::bitreverse)
969 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
970 F->arg_begin()->getType());
971 return true;
972 }
973
974 if (Name.starts_with("addp")) {
975 // 'aarch64.neon.addp*'.
976 if (F->arg_size() != 2)
977 return false; // Invalid IR.
978 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
982 return true;
983 }
984 }
985
986 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
987 if (Name.starts_with("bfcvt")) {
988 NewFn = nullptr;
989 return true;
990 }
991
992 return false; // No other 'aarch64.neon.*'.
993 }
994 if (Name.consume_front("sve.")) {
995 // 'aarch64.sve.*'.
996 if (Name.consume_front("bf")) {
997 if (Name.consume_back(".lane")) {
998 // 'aarch64.sve.bf*.lane'.
1001 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1006 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1007 return true;
1008 }
1009 return false; // No other 'aarch64.sve.bf*.lane'.
1010 }
1011 return false; // No other 'aarch64.sve.bf*'.
1012 }
1013
1014 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1015 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1016 NewFn = nullptr;
1017 return true;
1018 }
1019
1020 if (Name.consume_front("addqv")) {
1021 // 'aarch64.sve.addqv'.
1022 if (!F->getReturnType()->isFPOrFPVectorTy())
1023 return false;
1024
1025 auto Args = F->getFunctionType()->params();
1026 Type *Tys[] = {F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1029 return true;
1030 }
1031
1032 if (Name.consume_front("ld")) {
1033 // 'aarch64.sve.ld*'.
1034 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.match(Name)) {
1036 Type *ScalarTy =
1037 cast<VectorType>(F->getReturnType())->getElementType();
1038 ElementCount EC =
1039 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1040 Type *Ty = VectorType::get(ScalarTy, EC);
1041 static const Intrinsic::ID LoadIDs[] = {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1045 };
1046 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1047 LoadIDs[Name[0] - '2'], Ty);
1048 return true;
1049 }
1050 return false; // No other 'aarch64.sve.ld*'.
1051 }
1052
1053 if (Name.consume_front("tuple.")) {
1054 // 'aarch64.sve.tuple.*'.
1055 if (Name.starts_with("get")) {
1056 // 'aarch64.sve.tuple.get*'.
1057 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1060 return true;
1061 }
1062
1063 if (Name.starts_with("set")) {
1064 // 'aarch64.sve.tuple.set*'.
1065 auto Args = F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1069 return true;
1070 }
1071
1072 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.match(Name)) {
1074 // 'aarch64.sve.tuple.create*'.
1075 auto Args = F->getFunctionType()->params();
1076 Type *Tys[] = {F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.tuple.*'.
1082 }
1083
1084 if (Name.starts_with("rev.nxv")) {
1085 // 'aarch64.sve.rev.<Ty>'
1087 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1088 return true;
1089 }
1090
1091 return false; // No other 'aarch64.sve.*'.
1092 }
1093 }
1094 return false; // No other 'arm.*', 'aarch64.*'.
1095}
1096
1098 StringRef Name) {
1099 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1102 .Case("im2col.3d",
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1104 .Case("im2col.4d",
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1106 .Case("im2col.5d",
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1114
1116 return ID;
1117
1118 // These intrinsics may need upgrade for two reasons:
1119 // (1) When the address-space of the first argument is shared[AS=3]
1120 // (and we upgrade it to use shared_cluster address-space[AS=7])
1121 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1123 return ID;
1124
1125 // (2) When there are only two boolean flag arguments at the end:
1126 //
1127 // The last three parameters of the older version of these
1128 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1129 //
1130 // The newer version reads as:
1131 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1132 //
1133 // So, when the type of the [N-3]rd argument is "not i1", then
1134 // it is the older version and we need to upgrade.
1135 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1137 if (!ArgType->isIntegerTy(1))
1138 return ID;
1139 }
1140
1142}
1143
1145 StringRef Name) {
1146 if (Name.consume_front("mapa.shared.cluster"))
1147 if (F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1150
1151 if (Name.consume_front("cp.async.bulk.")) {
1154 .Case("global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .Case("shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1159
1161 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1163 return ID;
1164 }
1165
1167}
1168
1170 if (Name.consume_front("fma.rn."))
1171 return StringSwitch<Intrinsic::ID>(Name)
1172 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1175 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1177
1178 if (Name.consume_front("fmax."))
1179 return StringSwitch<Intrinsic::ID>(Name)
1180 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1181 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1182 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1183 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1184 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1185 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1186 .Case("ftz.nan.xorsign.abs.bf16",
1187 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1188 .Case("ftz.nan.xorsign.abs.bf16x2",
1189 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1190 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1191 .Case("ftz.xorsign.abs.bf16x2",
1192 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1193 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1194 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1195 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1196 .Case("nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1198 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1199 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1201
1202 if (Name.consume_front("fmin."))
1203 return StringSwitch<Intrinsic::ID>(Name)
1204 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1205 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1206 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1207 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1208 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1209 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1210 .Case("ftz.nan.xorsign.abs.bf16",
1211 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1212 .Case("ftz.nan.xorsign.abs.bf16x2",
1213 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1214 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1215 .Case("ftz.xorsign.abs.bf16x2",
1216 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1217 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1218 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1219 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1220 .Case("nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1222 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1223 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1225
1226 if (Name.consume_front("neg."))
1227 return StringSwitch<Intrinsic::ID>(Name)
1228 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1229 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1231
1233}
1234
1236 return Name.consume_front("local") || Name.consume_front("shared") ||
1237 Name.consume_front("global") || Name.consume_front("constant") ||
1238 Name.consume_front("param");
1239}
1240
1242 const FunctionType *FuncTy) {
1243 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1244 if (Name.starts_with("to.fp16")) {
1245 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1246 HalfTy) &&
1247 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1248 FuncTy->getReturnType());
1249 }
1250
1251 if (Name.starts_with("from.fp16")) {
1252 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1253 HalfTy) &&
1254 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1255 FuncTy->getReturnType());
1256 }
1257
1258 return false;
1259}
1260
1262 bool CanUpgradeDebugIntrinsicsToRecords) {
1263 assert(F && "Illegal to upgrade a non-existent Function.");
1264
1265 StringRef Name = F->getName();
1266
1267 // Quickly eliminate it, if it's not a candidate.
1268 if (!Name.consume_front("llvm.") || Name.empty())
1269 return false;
1270
1271 switch (Name[0]) {
1272 default: break;
1273 case 'a': {
1274 bool IsArm = Name.consume_front("arm.");
1275 if (IsArm || Name.consume_front("aarch64.")) {
1276 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1277 return true;
1278 break;
1279 }
1280
1281 if (Name.consume_front("amdgcn.")) {
1282 if (Name == "alignbit") {
1283 // Target specific intrinsic became redundant
1285 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1286 return true;
1287 }
1288
1289 if (Name.consume_front("atomic.")) {
1290 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1291 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1292 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1293 // and usub_sat so there's no new declaration.
1294 NewFn = nullptr;
1295 return true;
1296 }
1297 break; // No other 'amdgcn.atomic.*'
1298 }
1299
1300 // Legacy wmma iu intrinsics without the optional clamp operand.
1301 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1302 F->arg_size() == 7) {
1303 NewFn = nullptr;
1304 return true;
1305 }
1306 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1307 F->arg_size() == 8) {
1308 NewFn = nullptr;
1309 return true;
1310 }
1311
1312 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1313 Name.consume_front("flat.atomic.")) {
1314 if (Name.starts_with("fadd") ||
1315 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1316 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1317 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1318 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1319 // declaration.
1320 NewFn = nullptr;
1321 return true;
1322 }
1323 }
1324
1325 if (Name.starts_with("ldexp.")) {
1326 // Target specific intrinsic became redundant
1328 F->getParent(), Intrinsic::ldexp,
1329 {F->getReturnType(), F->getArg(1)->getType()});
1330 return true;
1331 }
1332 break; // No other 'amdgcn.*'
1333 }
1334
1335 break;
1336 }
1337 case 'c': {
1338 if (F->arg_size() == 1) {
1339 if (Name.consume_front("convert.")) {
1340 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1341 NewFn = nullptr;
1342 return true;
1343 }
1344 }
1345
1347 .StartsWith("ctlz.", Intrinsic::ctlz)
1348 .StartsWith("cttz.", Intrinsic::cttz)
1351 rename(F);
1352 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1353 F->arg_begin()->getType());
1354 return true;
1355 }
1356 }
1357
1358 if (F->arg_size() == 2 && Name == "coro.end") {
1359 rename(F);
1360 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1361 Intrinsic::coro_end);
1362 return true;
1363 }
1364
1365 break;
1366 }
1367 case 'd':
1368 if (Name.consume_front("dbg.")) {
1369 // Mark debug intrinsics for upgrade to new debug format.
1370 if (CanUpgradeDebugIntrinsicsToRecords) {
1371 if (Name == "addr" || Name == "value" || Name == "assign" ||
1372 Name == "declare" || Name == "label") {
1373 // There's no function to replace these with.
1374 NewFn = nullptr;
1375 // But we do want these to get upgraded.
1376 return true;
1377 }
1378 }
1379 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1380 // converted to DbgVariableRecords later.
1381 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1382 rename(F);
1383 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1384 Intrinsic::dbg_value);
1385 return true;
1386 }
1387 break; // No other 'dbg.*'.
1388 }
1389 break;
1390 case 'e':
1391 if (Name.consume_front("experimental.vector.")) {
1394 // Skip over extract.last.active, otherwise it will be 'upgraded'
1395 // to a regular vector extract which is a different operation.
1396 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1397 .StartsWith("extract.", Intrinsic::vector_extract)
1398 .StartsWith("insert.", Intrinsic::vector_insert)
1399 .StartsWith("reverse.", Intrinsic::vector_reverse)
1400 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1401 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1402 .StartsWith("partial.reduce.add",
1403 Intrinsic::vector_partial_reduce_add)
1406 const auto *FT = F->getFunctionType();
1408 if (ID == Intrinsic::vector_extract ||
1409 ID == Intrinsic::vector_interleave2)
1410 // Extracting overloads the return type.
1411 Tys.push_back(FT->getReturnType());
1412 if (ID != Intrinsic::vector_interleave2)
1413 Tys.push_back(FT->getParamType(0));
1414 if (ID == Intrinsic::vector_insert ||
1415 ID == Intrinsic::vector_partial_reduce_add)
1416 // Inserting overloads the inserted type.
1417 Tys.push_back(FT->getParamType(1));
1418 rename(F);
1419 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1420 return true;
1421 }
1422
1423 if (Name.consume_front("reduce.")) {
1425 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1426 if (R.match(Name, &Groups))
1428 .Case("add", Intrinsic::vector_reduce_add)
1429 .Case("mul", Intrinsic::vector_reduce_mul)
1430 .Case("and", Intrinsic::vector_reduce_and)
1431 .Case("or", Intrinsic::vector_reduce_or)
1432 .Case("xor", Intrinsic::vector_reduce_xor)
1433 .Case("smax", Intrinsic::vector_reduce_smax)
1434 .Case("smin", Intrinsic::vector_reduce_smin)
1435 .Case("umax", Intrinsic::vector_reduce_umax)
1436 .Case("umin", Intrinsic::vector_reduce_umin)
1437 .Case("fmax", Intrinsic::vector_reduce_fmax)
1438 .Case("fmin", Intrinsic::vector_reduce_fmin)
1440
1441 bool V2 = false;
1443 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1444 Groups.clear();
1445 V2 = true;
1446 if (R2.match(Name, &Groups))
1448 .Case("fadd", Intrinsic::vector_reduce_fadd)
1449 .Case("fmul", Intrinsic::vector_reduce_fmul)
1451 }
1453 rename(F);
1454 auto Args = F->getFunctionType()->params();
1455 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1456 {Args[V2 ? 1 : 0]});
1457 return true;
1458 }
1459 break; // No other 'expermental.vector.reduce.*'.
1460 }
1461
1462 if (Name.consume_front("splice"))
1463 return true;
1464 break; // No other 'experimental.vector.*'.
1465 }
1466 if (Name.consume_front("experimental.stepvector.")) {
1467 Intrinsic::ID ID = Intrinsic::stepvector;
1468 rename(F);
1470 F->getParent(), ID, F->getFunctionType()->getReturnType());
1471 return true;
1472 }
1473 break; // No other 'e*'.
1474 case 'f':
1475 if (Name.starts_with("flt.rounds")) {
1476 rename(F);
1477 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1478 Intrinsic::get_rounding);
1479 return true;
1480 }
1481 break;
1482 case 'i':
1483 if (Name.starts_with("invariant.group.barrier")) {
1484 // Rename invariant.group.barrier to launder.invariant.group
1485 auto Args = F->getFunctionType()->params();
1486 Type* ObjectPtr[1] = {Args[0]};
1487 rename(F);
1489 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1490 return true;
1491 }
1492 break;
1493 case 'l':
1494 if ((Name.starts_with("lifetime.start") ||
1495 Name.starts_with("lifetime.end")) &&
1496 F->arg_size() == 2) {
1497 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1498 ? Intrinsic::lifetime_start
1499 : Intrinsic::lifetime_end;
1500 rename(F);
1501 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1502 F->getArg(0)->getType());
1503 return true;
1504 }
1505 break;
1506 case 'm': {
1507 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1508 // alignment parameter to embedding the alignment as an attribute of
1509 // the pointer args.
1510 if (unsigned ID = StringSwitch<unsigned>(Name)
1511 .StartsWith("memcpy.", Intrinsic::memcpy)
1512 .StartsWith("memmove.", Intrinsic::memmove)
1513 .Default(0)) {
1514 if (F->arg_size() == 5) {
1515 rename(F);
1516 // Get the types of dest, src, and len
1517 ArrayRef<Type *> ParamTypes =
1518 F->getFunctionType()->params().slice(0, 3);
1519 NewFn =
1520 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1521 return true;
1522 }
1523 }
1524 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1525 rename(F);
1526 // Get the types of dest, and len
1527 const auto *FT = F->getFunctionType();
1528 Type *ParamTypes[2] = {
1529 FT->getParamType(0), // Dest
1530 FT->getParamType(2) // len
1531 };
1532 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1533 Intrinsic::memset, ParamTypes);
1534 return true;
1535 }
1536
1537 unsigned MaskedID =
1539 .StartsWith("masked.load", Intrinsic::masked_load)
1540 .StartsWith("masked.gather", Intrinsic::masked_gather)
1541 .StartsWith("masked.store", Intrinsic::masked_store)
1542 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1543 .Default(0);
1544 if (MaskedID && F->arg_size() == 4) {
1545 rename(F);
1546 if (MaskedID == Intrinsic::masked_load ||
1547 MaskedID == Intrinsic::masked_gather) {
1549 F->getParent(), MaskedID,
1550 {F->getReturnType(), F->getArg(0)->getType()});
1551 return true;
1552 }
1554 F->getParent(), MaskedID,
1555 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1556 return true;
1557 }
1558 break;
1559 }
1560 case 'n': {
1561 if (Name.consume_front("nvvm.")) {
1562 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1563 if (F->arg_size() == 1) {
1564 Intrinsic::ID IID =
1566 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1567 .Case("clz.i", Intrinsic::ctlz)
1568 .Case("popc.i", Intrinsic::ctpop)
1570 if (IID != Intrinsic::not_intrinsic) {
1571 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1572 {F->getReturnType()});
1573 return true;
1574 }
1575 } else if (F->arg_size() == 2) {
1576 Intrinsic::ID IID =
1578 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1579 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1580 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1581 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1583 if (IID != Intrinsic::not_intrinsic) {
1584 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1585 {F->getReturnType()});
1586 return true;
1587 }
1588 }
1589
1590 // Check for nvvm intrinsics that need a return type adjustment.
1591 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1593 if (IID != Intrinsic::not_intrinsic) {
1594 NewFn = nullptr;
1595 return true;
1596 }
1597 }
1598
1599 // Upgrade Distributed Shared Memory Intrinsics
1601 if (IID != Intrinsic::not_intrinsic) {
1602 rename(F);
1603 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1604 return true;
1605 }
1606
1607 // Upgrade TMA copy G2S Intrinsics
1609 if (IID != Intrinsic::not_intrinsic) {
1610 rename(F);
1611 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1612 return true;
1613 }
1614
1615 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1616 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1617 //
1618 // TODO: We could add lohi.i2d.
1619 bool Expand = false;
1620 if (Name.consume_front("abs."))
1621 // nvvm.abs.{i,ii}
1622 Expand =
1623 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1624 else if (Name.consume_front("fabs."))
1625 // nvvm.fabs.{f,ftz.f,d}
1626 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1627 else if (Name.consume_front("ex2.approx."))
1628 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1629 Expand =
1630 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1631 else if (Name.consume_front("atomic.load."))
1632 // nvvm.atomic.load.add.{f32,f64}.p
1633 // nvvm.atomic.load.{inc,dec}.32.p
1634 Expand = StringSwitch<bool>(Name)
1635 .StartsWith("add.f32.p", true)
1636 .StartsWith("add.f64.p", true)
1637 .StartsWith("inc.32.p", true)
1638 .StartsWith("dec.32.p", true)
1639 .Default(false);
1640 else if (Name.consume_front("bitcast."))
1641 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1642 Expand =
1643 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1644 else if (Name.consume_front("rotate."))
1645 // nvvm.rotate.{b32,b64,right.b64}
1646 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1647 else if (Name.consume_front("ptr.gen.to."))
1648 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1649 Expand = consumeNVVMPtrAddrSpace(Name);
1650 else if (Name.consume_front("ptr."))
1651 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1652 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1653 else if (Name.consume_front("ldg.global."))
1654 // nvvm.ldg.global.{i,p,f}
1655 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1656 Name.starts_with("p."));
1657 else
1658 Expand = StringSwitch<bool>(Name)
1659 .Case("barrier0", true)
1660 .Case("barrier.n", true)
1661 .Case("barrier.sync.cnt", true)
1662 .Case("barrier.sync", true)
1663 .Case("barrier", true)
1664 .Case("bar.sync", true)
1665 .Case("barrier0.popc", true)
1666 .Case("barrier0.and", true)
1667 .Case("barrier0.or", true)
1668 .Case("clz.ll", true)
1669 .Case("popc.ll", true)
1670 .Case("h2f", true)
1671 .Case("swap.lo.hi.b64", true)
1672 .Case("tanh.approx.f32", true)
1673 .Default(false);
1674
1675 if (Expand) {
1676 NewFn = nullptr;
1677 return true;
1678 }
1679 break; // No other 'nvvm.*'.
1680 }
1681 break;
1682 }
1683 case 'o':
1684 if (Name.starts_with("objectsize.")) {
1685 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1686 if (F->arg_size() == 2 || F->arg_size() == 3) {
1687 rename(F);
1688 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1689 Intrinsic::objectsize, Tys);
1690 return true;
1691 }
1692 }
1693 break;
1694
1695 case 'p':
1696 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1697 rename(F);
1699 F->getParent(), Intrinsic::ptr_annotation,
1700 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1701 return true;
1702 }
1703 break;
1704
1705 case 'r': {
1706 if (Name.consume_front("riscv.")) {
1709 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1710 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1711 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1712 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1715 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1716 rename(F);
1717 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1718 return true;
1719 }
1720 break; // No other applicable upgrades.
1721 }
1722
1724 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1725 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1728 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1729 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1730 rename(F);
1731 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1732 return true;
1733 }
1734 break; // No other applicable upgrades.
1735 }
1736
1738 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1739 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1740 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1741 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1742 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1743 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1746 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1747 rename(F);
1748 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1749 return true;
1750 }
1751 break; // No other applicable upgrades.
1752 }
1753
1754 // Replace llvm.riscv.clmul with llvm.clmul.
1755 if (Name == "clmul.i32" || Name == "clmul.i64") {
1757 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1758 return true;
1759 }
1760
1761 break; // No other 'riscv.*' intrinsics
1762 }
1763 } break;
1764
1765 case 's':
1766 if (Name == "stackprotectorcheck") {
1767 NewFn = nullptr;
1768 return true;
1769 }
1770 break;
1771
1772 case 't':
1773 if (Name == "thread.pointer") {
1775 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1776 return true;
1777 }
1778 break;
1779
1780 case 'v': {
1781 if (Name == "var.annotation" && F->arg_size() == 4) {
1782 rename(F);
1784 F->getParent(), Intrinsic::var_annotation,
1785 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1786 return true;
1787 }
1788 if (Name.consume_front("vector.splice")) {
1789 if (Name.starts_with(".left") || Name.starts_with(".right"))
1790 break;
1791 return true;
1792 }
1793 break;
1794 }
1795
1796 case 'w':
1797 if (Name.consume_front("wasm.")) {
1800 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1801 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1802 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1805 rename(F);
1806 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1807 F->getReturnType());
1808 return true;
1809 }
1810
1811 if (Name.consume_front("dot.i8x16.i7x16.")) {
1813 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1814 .Case("add.signed",
1815 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1818 rename(F);
1819 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1820 return true;
1821 }
1822 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1823 }
1824 break; // No other 'wasm.*'.
1825 }
1826 break;
1827
1828 case 'x':
1829 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1830 return true;
1831 }
1832
1833 auto *ST = dyn_cast<StructType>(F->getReturnType());
1834 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1835 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1836 // Replace return type with literal non-packed struct. Only do this for
1837 // intrinsics declared to return a struct, not for intrinsics with
1838 // overloaded return type, in which case the exact struct type will be
1839 // mangled into the name.
1842 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1843 auto *FT = F->getFunctionType();
1844 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1845 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1846 std::string Name = F->getName().str();
1847 rename(F);
1848 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1849 Name, F->getParent());
1850
1851 // The new function may also need remangling.
1852 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1853 NewFn = *Result;
1854 return true;
1855 }
1856 }
1857
1858 // Remangle our intrinsic since we upgrade the mangling
1860 if (Result != std::nullopt) {
1861 NewFn = *Result;
1862 return true;
1863 }
1864
1865 // This may not belong here. This function is effectively being overloaded
1866 // to both detect an intrinsic which needs upgrading, and to provide the
1867 // upgraded form of the intrinsic. We should perhaps have two separate
1868 // functions for this.
1869 return false;
1870}
1871
1873 bool CanUpgradeDebugIntrinsicsToRecords) {
1874 NewFn = nullptr;
1875 bool Upgraded =
1876 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1877
1878 // Upgrade intrinsic attributes. This does not change the function.
1879 if (NewFn)
1880 F = NewFn;
1881 if (Intrinsic::ID id = F->getIntrinsicID()) {
1882 // Only do this if the intrinsic signature is valid.
1883 SmallVector<Type *> OverloadTys;
1884 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1885 F->setAttributes(
1886 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1887 }
1888 return Upgraded;
1889}
1890
1892 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1893 GV->getName() == "llvm.global_dtors")) ||
1894 !GV->hasInitializer())
1895 return nullptr;
1897 if (!ATy)
1898 return nullptr;
1900 if (!STy || STy->getNumElements() != 2)
1901 return nullptr;
1902
1903 LLVMContext &C = GV->getContext();
1904 IRBuilder<> IRB(C);
1905 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1906 IRB.getPtrTy());
1907 Constant *Init = GV->getInitializer();
1908 unsigned N = Init->getNumOperands();
1909 std::vector<Constant *> NewCtors(N);
1910 for (unsigned i = 0; i != N; ++i) {
1911 auto Ctor = cast<Constant>(Init->getOperand(i));
1912 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1913 Ctor->getAggregateElement(1),
1915 }
1916 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1917
1918 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1919 NewInit, GV->getName());
1920}
1921
1922// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1923// to byte shuffles.
1925 unsigned Shift) {
1926 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1927 unsigned NumElts = ResultTy->getNumElements() * 8;
1928
1929 // Bitcast from a 64-bit element type to a byte element type.
1930 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1931 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1932
1933 // We'll be shuffling in zeroes.
1934 Value *Res = Constant::getNullValue(VecTy);
1935
1936 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1937 // we'll just return the zero vector.
1938 if (Shift < 16) {
1939 int Idxs[64];
1940 // 256/512-bit version is split into 2/4 16-byte lanes.
1941 for (unsigned l = 0; l != NumElts; l += 16)
1942 for (unsigned i = 0; i != 16; ++i) {
1943 unsigned Idx = NumElts + i - Shift;
1944 if (Idx < NumElts)
1945 Idx -= NumElts - 16; // end of lane, switch operand.
1946 Idxs[l + i] = Idx + l;
1947 }
1948
1949 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1950 }
1951
1952 // Bitcast back to a 64-bit element type.
1953 return Builder.CreateBitCast(Res, ResultTy, "cast");
1954}
1955
1956// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1957// to byte shuffles.
1959 unsigned Shift) {
1960 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1961 unsigned NumElts = ResultTy->getNumElements() * 8;
1962
1963 // Bitcast from a 64-bit element type to a byte element type.
1964 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1965 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1966
1967 // We'll be shuffling in zeroes.
1968 Value *Res = Constant::getNullValue(VecTy);
1969
1970 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1971 // we'll just return the zero vector.
1972 if (Shift < 16) {
1973 int Idxs[64];
1974 // 256/512-bit version is split into 2/4 16-byte lanes.
1975 for (unsigned l = 0; l != NumElts; l += 16)
1976 for (unsigned i = 0; i != 16; ++i) {
1977 unsigned Idx = i + Shift;
1978 if (Idx >= 16)
1979 Idx += NumElts - 16; // end of lane, switch operand.
1980 Idxs[l + i] = Idx + l;
1981 }
1982
1983 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1984 }
1985
1986 // Bitcast back to a 64-bit element type.
1987 return Builder.CreateBitCast(Res, ResultTy, "cast");
1988}
1989
1990static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1991 unsigned NumElts) {
1992 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1994 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1995 Mask = Builder.CreateBitCast(Mask, MaskTy);
1996
1997 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1998 // i8 and we need to extract down to the right number of elements.
1999 if (NumElts <= 4) {
2000 int Indices[4];
2001 for (unsigned i = 0; i != NumElts; ++i)
2002 Indices[i] = i;
2003 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2004 "extract");
2005 }
2006
2007 return Mask;
2008}
2009
2010static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2011 Value *Op1) {
2012 // If the mask is all ones just emit the first operation.
2013 if (const auto *C = dyn_cast<Constant>(Mask))
2014 if (C->isAllOnesValue())
2015 return Op0;
2016
2017 Mask = getX86MaskVec(Builder, Mask,
2018 cast<FixedVectorType>(Op0->getType())->getNumElements());
2019 return Builder.CreateSelect(Mask, Op0, Op1);
2020}
2021
2022static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2023 Value *Op1) {
2024 // If the mask is all ones just emit the first operation.
2025 if (const auto *C = dyn_cast<Constant>(Mask))
2026 if (C->isAllOnesValue())
2027 return Op0;
2028
2029 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2030 Mask->getType()->getIntegerBitWidth());
2031 Mask = Builder.CreateBitCast(Mask, MaskTy);
2032 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2033 return Builder.CreateSelect(Mask, Op0, Op1);
2034}
2035
2036// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2037// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2038// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2040 Value *Op1, Value *Shift,
2041 Value *Passthru, Value *Mask,
2042 bool IsVALIGN) {
2043 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2044
2045 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2046 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2047 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2048 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2049
2050 // Mask the immediate for VALIGN.
2051 if (IsVALIGN)
2052 ShiftVal &= (NumElts - 1);
2053
2054 // If palignr is shifting the pair of vectors more than the size of two
2055 // lanes, emit zero.
2056 if (ShiftVal >= 32)
2058
2059 // If palignr is shifting the pair of input vectors more than one lane,
2060 // but less than two lanes, convert to shifting in zeroes.
2061 if (ShiftVal > 16) {
2062 ShiftVal -= 16;
2063 Op1 = Op0;
2065 }
2066
2067 int Indices[64];
2068 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2069 for (unsigned l = 0; l < NumElts; l += 16) {
2070 for (unsigned i = 0; i != 16; ++i) {
2071 unsigned Idx = ShiftVal + i;
2072 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2073 Idx += NumElts - 16; // End of lane, switch operand.
2074 Indices[l + i] = Idx + l;
2075 }
2076 }
2077
2078 Value *Align = Builder.CreateShuffleVector(
2079 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2080
2081 return emitX86Select(Builder, Mask, Align, Passthru);
2082}
2083
2085 bool ZeroMask, bool IndexForm) {
2086 Type *Ty = CI.getType();
2087 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2088 unsigned EltWidth = Ty->getScalarSizeInBits();
2089 bool IsFloat = Ty->isFPOrFPVectorTy();
2090 Intrinsic::ID IID;
2091 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2092 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2093 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2094 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2095 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2096 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2097 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2098 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2099 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2100 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2101 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2102 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2103 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2104 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2105 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2106 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2107 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2108 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2109 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2110 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2111 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2112 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2113 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2114 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2115 else if (VecWidth == 128 && EltWidth == 16)
2116 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2117 else if (VecWidth == 256 && EltWidth == 16)
2118 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2119 else if (VecWidth == 512 && EltWidth == 16)
2120 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2121 else if (VecWidth == 128 && EltWidth == 8)
2122 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2123 else if (VecWidth == 256 && EltWidth == 8)
2124 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2125 else if (VecWidth == 512 && EltWidth == 8)
2126 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2127 else
2128 llvm_unreachable("Unexpected intrinsic");
2129
2130 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2131 CI.getArgOperand(2) };
2132
2133 // If this isn't index form we need to swap operand 0 and 1.
2134 if (!IndexForm)
2135 std::swap(Args[0], Args[1]);
2136
2137 Value *V = Builder.CreateIntrinsic(IID, Args);
2138 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2139 : Builder.CreateBitCast(CI.getArgOperand(1),
2140 Ty);
2141 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2142}
2143
2145 Intrinsic::ID IID) {
2146 Type *Ty = CI.getType();
2147 Value *Op0 = CI.getOperand(0);
2148 Value *Op1 = CI.getOperand(1);
2149 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2150
2151 if (CI.arg_size() == 4) { // For masked intrinsics.
2152 Value *VecSrc = CI.getOperand(2);
2153 Value *Mask = CI.getOperand(3);
2154 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2155 }
2156 return Res;
2157}
2158
2160 bool IsRotateRight) {
2161 Type *Ty = CI.getType();
2162 Value *Src = CI.getArgOperand(0);
2163 Value *Amt = CI.getArgOperand(1);
2164
2165 // Amount may be scalar immediate, in which case create a splat vector.
2166 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2167 // we only care about the lowest log2 bits anyway.
2168 if (Amt->getType() != Ty) {
2169 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2170 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2171 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2172 }
2173
2174 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2175 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2176
2177 if (CI.arg_size() == 4) { // For masked intrinsics.
2178 Value *VecSrc = CI.getOperand(2);
2179 Value *Mask = CI.getOperand(3);
2180 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2181 }
2182 return Res;
2183}
2184
2185static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2186 bool IsSigned) {
2187 Type *Ty = CI.getType();
2188 Value *LHS = CI.getArgOperand(0);
2189 Value *RHS = CI.getArgOperand(1);
2190
2191 CmpInst::Predicate Pred;
2192 switch (Imm) {
2193 case 0x0:
2194 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2195 break;
2196 case 0x1:
2197 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2198 break;
2199 case 0x2:
2200 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2201 break;
2202 case 0x3:
2203 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2204 break;
2205 case 0x4:
2206 Pred = ICmpInst::ICMP_EQ;
2207 break;
2208 case 0x5:
2209 Pred = ICmpInst::ICMP_NE;
2210 break;
2211 case 0x6:
2212 return Constant::getNullValue(Ty); // FALSE
2213 case 0x7:
2214 return Constant::getAllOnesValue(Ty); // TRUE
2215 default:
2216 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2217 }
2218
2219 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2220 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2221 return Ext;
2222}
2223
2225 bool IsShiftRight, bool ZeroMask) {
2226 Type *Ty = CI.getType();
2227 Value *Op0 = CI.getArgOperand(0);
2228 Value *Op1 = CI.getArgOperand(1);
2229 Value *Amt = CI.getArgOperand(2);
2230
2231 if (IsShiftRight)
2232 std::swap(Op0, Op1);
2233
2234 // Amount may be scalar immediate, in which case create a splat vector.
2235 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2236 // we only care about the lowest log2 bits anyway.
2237 if (Amt->getType() != Ty) {
2238 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2239 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2240 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2241 }
2242
2243 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2244 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2245
2246 unsigned NumArgs = CI.arg_size();
2247 if (NumArgs >= 4) { // For masked intrinsics.
2248 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2249 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2250 CI.getArgOperand(0);
2251 Value *Mask = CI.getOperand(NumArgs - 1);
2252 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2253 }
2254 return Res;
2255}
2256
2258 Value *Mask, bool Aligned) {
2259 const Align Alignment =
2260 Aligned
2261 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2262 : Align(1);
2263
2264 // If the mask is all ones just emit a regular store.
2265 if (const auto *C = dyn_cast<Constant>(Mask))
2266 if (C->isAllOnesValue())
2267 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2268
2269 // Convert the mask from an integer type to a vector of i1.
2270 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2271 Mask = getX86MaskVec(Builder, Mask, NumElts);
2272 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2273}
2274
2276 Value *Passthru, Value *Mask, bool Aligned) {
2277 Type *ValTy = Passthru->getType();
2278 const Align Alignment =
2279 Aligned
2280 ? Align(
2282 8)
2283 : Align(1);
2284
2285 // If the mask is all ones just emit a regular store.
2286 if (const auto *C = dyn_cast<Constant>(Mask))
2287 if (C->isAllOnesValue())
2288 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2289
2290 // Convert the mask from an integer type to a vector of i1.
2291 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2292 Mask = getX86MaskVec(Builder, Mask, NumElts);
2293 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2294}
2295
2296static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2297 Type *Ty = CI.getType();
2298 Value *Op0 = CI.getArgOperand(0);
2299 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2300 {Op0, Builder.getInt1(false)});
2301 if (CI.arg_size() == 3)
2302 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2303 return Res;
2304}
2305
2306static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2307 Type *Ty = CI.getType();
2308
2309 // Arguments have a vXi32 type so cast to vXi64.
2310 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2311 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2312
2313 if (IsSigned) {
2314 // Shift left then arithmetic shift right.
2315 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2316 LHS = Builder.CreateShl(LHS, ShiftAmt);
2317 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2318 RHS = Builder.CreateShl(RHS, ShiftAmt);
2319 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2320 } else {
2321 // Clear the upper bits.
2322 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2323 LHS = Builder.CreateAnd(LHS, Mask);
2324 RHS = Builder.CreateAnd(RHS, Mask);
2325 }
2326
2327 Value *Res = Builder.CreateMul(LHS, RHS);
2328
2329 if (CI.arg_size() == 4)
2330 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2331
2332 return Res;
2333}
2334
2335// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2337 Value *Mask) {
2338 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2339 if (Mask) {
2340 const auto *C = dyn_cast<Constant>(Mask);
2341 if (!C || !C->isAllOnesValue())
2342 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2343 }
2344
2345 if (NumElts < 8) {
2346 int Indices[8];
2347 for (unsigned i = 0; i != NumElts; ++i)
2348 Indices[i] = i;
2349 for (unsigned i = NumElts; i != 8; ++i)
2350 Indices[i] = NumElts + i % NumElts;
2351 Vec = Builder.CreateShuffleVector(Vec,
2353 Indices);
2354 }
2355 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2356}
2357
2359 unsigned CC, bool Signed) {
2360 Value *Op0 = CI.getArgOperand(0);
2361 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2362
2363 Value *Cmp;
2364 if (CC == 3) {
2366 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2367 } else if (CC == 7) {
2369 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2370 } else {
2372 switch (CC) {
2373 default: llvm_unreachable("Unknown condition code");
2374 case 0: Pred = ICmpInst::ICMP_EQ; break;
2375 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2376 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2377 case 4: Pred = ICmpInst::ICMP_NE; break;
2378 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2379 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2380 }
2381 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2382 }
2383
2384 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2385
2386 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2387}
2388
2389// Replace a masked intrinsic with an older unmasked intrinsic.
2391 Intrinsic::ID IID) {
2392 Value *Rep =
2393 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2394 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2395}
2396
2398 Value* A = CI.getArgOperand(0);
2399 Value* B = CI.getArgOperand(1);
2400 Value* Src = CI.getArgOperand(2);
2401 Value* Mask = CI.getArgOperand(3);
2402
2403 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2404 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2405 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2406 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2407 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2408 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2409}
2410
2412 Value* Op = CI.getArgOperand(0);
2413 Type* ReturnOp = CI.getType();
2414 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2415 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2416 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2417}
2418
2419// Replace intrinsic with unmasked version and a select.
2421 CallBase &CI, Value *&Rep) {
2422 Name = Name.substr(12); // Remove avx512.mask.
2423
2424 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2425 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2426 Intrinsic::ID IID;
2427 if (Name.starts_with("max.p")) {
2428 if (VecWidth == 128 && EltWidth == 32)
2429 IID = Intrinsic::x86_sse_max_ps;
2430 else if (VecWidth == 128 && EltWidth == 64)
2431 IID = Intrinsic::x86_sse2_max_pd;
2432 else if (VecWidth == 256 && EltWidth == 32)
2433 IID = Intrinsic::x86_avx_max_ps_256;
2434 else if (VecWidth == 256 && EltWidth == 64)
2435 IID = Intrinsic::x86_avx_max_pd_256;
2436 else
2437 llvm_unreachable("Unexpected intrinsic");
2438 } else if (Name.starts_with("min.p")) {
2439 if (VecWidth == 128 && EltWidth == 32)
2440 IID = Intrinsic::x86_sse_min_ps;
2441 else if (VecWidth == 128 && EltWidth == 64)
2442 IID = Intrinsic::x86_sse2_min_pd;
2443 else if (VecWidth == 256 && EltWidth == 32)
2444 IID = Intrinsic::x86_avx_min_ps_256;
2445 else if (VecWidth == 256 && EltWidth == 64)
2446 IID = Intrinsic::x86_avx_min_pd_256;
2447 else
2448 llvm_unreachable("Unexpected intrinsic");
2449 } else if (Name.starts_with("pshuf.b.")) {
2450 if (VecWidth == 128)
2451 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2452 else if (VecWidth == 256)
2453 IID = Intrinsic::x86_avx2_pshuf_b;
2454 else if (VecWidth == 512)
2455 IID = Intrinsic::x86_avx512_pshuf_b_512;
2456 else
2457 llvm_unreachable("Unexpected intrinsic");
2458 } else if (Name.starts_with("pmul.hr.sw.")) {
2459 if (VecWidth == 128)
2460 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2461 else if (VecWidth == 256)
2462 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2463 else if (VecWidth == 512)
2464 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2465 else
2466 llvm_unreachable("Unexpected intrinsic");
2467 } else if (Name.starts_with("pmulh.w.")) {
2468 if (VecWidth == 128)
2469 IID = Intrinsic::x86_sse2_pmulh_w;
2470 else if (VecWidth == 256)
2471 IID = Intrinsic::x86_avx2_pmulh_w;
2472 else if (VecWidth == 512)
2473 IID = Intrinsic::x86_avx512_pmulh_w_512;
2474 else
2475 llvm_unreachable("Unexpected intrinsic");
2476 } else if (Name.starts_with("pmulhu.w.")) {
2477 if (VecWidth == 128)
2478 IID = Intrinsic::x86_sse2_pmulhu_w;
2479 else if (VecWidth == 256)
2480 IID = Intrinsic::x86_avx2_pmulhu_w;
2481 else if (VecWidth == 512)
2482 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2483 else
2484 llvm_unreachable("Unexpected intrinsic");
2485 } else if (Name.starts_with("pmaddw.d.")) {
2486 if (VecWidth == 128)
2487 IID = Intrinsic::x86_sse2_pmadd_wd;
2488 else if (VecWidth == 256)
2489 IID = Intrinsic::x86_avx2_pmadd_wd;
2490 else if (VecWidth == 512)
2491 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2492 else
2493 llvm_unreachable("Unexpected intrinsic");
2494 } else if (Name.starts_with("pmaddubs.w.")) {
2495 if (VecWidth == 128)
2496 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2497 else if (VecWidth == 256)
2498 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2499 else if (VecWidth == 512)
2500 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2501 else
2502 llvm_unreachable("Unexpected intrinsic");
2503 } else if (Name.starts_with("packsswb.")) {
2504 if (VecWidth == 128)
2505 IID = Intrinsic::x86_sse2_packsswb_128;
2506 else if (VecWidth == 256)
2507 IID = Intrinsic::x86_avx2_packsswb;
2508 else if (VecWidth == 512)
2509 IID = Intrinsic::x86_avx512_packsswb_512;
2510 else
2511 llvm_unreachable("Unexpected intrinsic");
2512 } else if (Name.starts_with("packssdw.")) {
2513 if (VecWidth == 128)
2514 IID = Intrinsic::x86_sse2_packssdw_128;
2515 else if (VecWidth == 256)
2516 IID = Intrinsic::x86_avx2_packssdw;
2517 else if (VecWidth == 512)
2518 IID = Intrinsic::x86_avx512_packssdw_512;
2519 else
2520 llvm_unreachable("Unexpected intrinsic");
2521 } else if (Name.starts_with("packuswb.")) {
2522 if (VecWidth == 128)
2523 IID = Intrinsic::x86_sse2_packuswb_128;
2524 else if (VecWidth == 256)
2525 IID = Intrinsic::x86_avx2_packuswb;
2526 else if (VecWidth == 512)
2527 IID = Intrinsic::x86_avx512_packuswb_512;
2528 else
2529 llvm_unreachable("Unexpected intrinsic");
2530 } else if (Name.starts_with("packusdw.")) {
2531 if (VecWidth == 128)
2532 IID = Intrinsic::x86_sse41_packusdw;
2533 else if (VecWidth == 256)
2534 IID = Intrinsic::x86_avx2_packusdw;
2535 else if (VecWidth == 512)
2536 IID = Intrinsic::x86_avx512_packusdw_512;
2537 else
2538 llvm_unreachable("Unexpected intrinsic");
2539 } else if (Name.starts_with("vpermilvar.")) {
2540 if (VecWidth == 128 && EltWidth == 32)
2541 IID = Intrinsic::x86_avx_vpermilvar_ps;
2542 else if (VecWidth == 128 && EltWidth == 64)
2543 IID = Intrinsic::x86_avx_vpermilvar_pd;
2544 else if (VecWidth == 256 && EltWidth == 32)
2545 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2546 else if (VecWidth == 256 && EltWidth == 64)
2547 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2548 else if (VecWidth == 512 && EltWidth == 32)
2549 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2550 else if (VecWidth == 512 && EltWidth == 64)
2551 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2552 else
2553 llvm_unreachable("Unexpected intrinsic");
2554 } else if (Name == "cvtpd2dq.256") {
2555 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2556 } else if (Name == "cvtpd2ps.256") {
2557 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2558 } else if (Name == "cvttpd2dq.256") {
2559 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2560 } else if (Name == "cvttps2dq.128") {
2561 IID = Intrinsic::x86_sse2_cvttps2dq;
2562 } else if (Name == "cvttps2dq.256") {
2563 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2564 } else if (Name.starts_with("permvar.")) {
2565 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2566 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2567 IID = Intrinsic::x86_avx2_permps;
2568 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2569 IID = Intrinsic::x86_avx2_permd;
2570 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2571 IID = Intrinsic::x86_avx512_permvar_df_256;
2572 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2573 IID = Intrinsic::x86_avx512_permvar_di_256;
2574 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2575 IID = Intrinsic::x86_avx512_permvar_sf_512;
2576 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2577 IID = Intrinsic::x86_avx512_permvar_si_512;
2578 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2579 IID = Intrinsic::x86_avx512_permvar_df_512;
2580 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2581 IID = Intrinsic::x86_avx512_permvar_di_512;
2582 else if (VecWidth == 128 && EltWidth == 16)
2583 IID = Intrinsic::x86_avx512_permvar_hi_128;
2584 else if (VecWidth == 256 && EltWidth == 16)
2585 IID = Intrinsic::x86_avx512_permvar_hi_256;
2586 else if (VecWidth == 512 && EltWidth == 16)
2587 IID = Intrinsic::x86_avx512_permvar_hi_512;
2588 else if (VecWidth == 128 && EltWidth == 8)
2589 IID = Intrinsic::x86_avx512_permvar_qi_128;
2590 else if (VecWidth == 256 && EltWidth == 8)
2591 IID = Intrinsic::x86_avx512_permvar_qi_256;
2592 else if (VecWidth == 512 && EltWidth == 8)
2593 IID = Intrinsic::x86_avx512_permvar_qi_512;
2594 else
2595 llvm_unreachable("Unexpected intrinsic");
2596 } else if (Name.starts_with("dbpsadbw.")) {
2597 if (VecWidth == 128)
2598 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2599 else if (VecWidth == 256)
2600 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2601 else if (VecWidth == 512)
2602 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2603 else
2604 llvm_unreachable("Unexpected intrinsic");
2605 } else if (Name.starts_with("pmultishift.qb.")) {
2606 if (VecWidth == 128)
2607 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2608 else if (VecWidth == 256)
2609 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2610 else if (VecWidth == 512)
2611 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2612 else
2613 llvm_unreachable("Unexpected intrinsic");
2614 } else if (Name.starts_with("conflict.")) {
2615 if (Name[9] == 'd' && VecWidth == 128)
2616 IID = Intrinsic::x86_avx512_conflict_d_128;
2617 else if (Name[9] == 'd' && VecWidth == 256)
2618 IID = Intrinsic::x86_avx512_conflict_d_256;
2619 else if (Name[9] == 'd' && VecWidth == 512)
2620 IID = Intrinsic::x86_avx512_conflict_d_512;
2621 else if (Name[9] == 'q' && VecWidth == 128)
2622 IID = Intrinsic::x86_avx512_conflict_q_128;
2623 else if (Name[9] == 'q' && VecWidth == 256)
2624 IID = Intrinsic::x86_avx512_conflict_q_256;
2625 else if (Name[9] == 'q' && VecWidth == 512)
2626 IID = Intrinsic::x86_avx512_conflict_q_512;
2627 else
2628 llvm_unreachable("Unexpected intrinsic");
2629 } else if (Name.starts_with("pavg.")) {
2630 if (Name[5] == 'b' && VecWidth == 128)
2631 IID = Intrinsic::x86_sse2_pavg_b;
2632 else if (Name[5] == 'b' && VecWidth == 256)
2633 IID = Intrinsic::x86_avx2_pavg_b;
2634 else if (Name[5] == 'b' && VecWidth == 512)
2635 IID = Intrinsic::x86_avx512_pavg_b_512;
2636 else if (Name[5] == 'w' && VecWidth == 128)
2637 IID = Intrinsic::x86_sse2_pavg_w;
2638 else if (Name[5] == 'w' && VecWidth == 256)
2639 IID = Intrinsic::x86_avx2_pavg_w;
2640 else if (Name[5] == 'w' && VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_pavg_w_512;
2642 else
2643 llvm_unreachable("Unexpected intrinsic");
2644 } else
2645 return false;
2646
2647 SmallVector<Value *, 4> Args(CI.args());
2648 Args.pop_back();
2649 Args.pop_back();
2650 Rep = Builder.CreateIntrinsic(IID, Args);
2651 unsigned NumArgs = CI.arg_size();
2652 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2653 CI.getArgOperand(NumArgs - 2));
2654 return true;
2655}
2656
2657/// Upgrade comment in call to inline asm that represents an objc retain release
2658/// marker.
2659void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2660 size_t Pos;
2661 if (AsmStr->find("mov\tfp") == 0 &&
2662 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2663 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2664 AsmStr->replace(Pos, 1, ";");
2665 }
2666}
2667
2669 Function *F, IRBuilder<> &Builder) {
2670 Value *Rep = nullptr;
2671
2672 if (Name == "abs.i" || Name == "abs.ll") {
2673 Value *Arg = CI->getArgOperand(0);
2674 Value *Neg = Builder.CreateNeg(Arg, "neg");
2675 Value *Cmp = Builder.CreateICmpSGE(
2676 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2677 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2678 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2679 Type *Ty = (Name == "abs.bf16")
2680 ? Builder.getBFloatTy()
2681 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2682 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2683 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2684 Rep = Builder.CreateBitCast(Abs, CI->getType());
2685 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2686 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2687 : Intrinsic::nvvm_fabs;
2688 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2689 } else if (Name.consume_front("ex2.approx.")) {
2690 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2691 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2692 : Intrinsic::nvvm_ex2_approx;
2693 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2694 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2695 Name.starts_with("atomic.load.add.f64.p")) {
2696 Value *Ptr = CI->getArgOperand(0);
2697 Value *Val = CI->getArgOperand(1);
2698 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2700 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2701 Name.starts_with("atomic.load.dec.32.p")) {
2702 Value *Ptr = CI->getArgOperand(0);
2703 Value *Val = CI->getArgOperand(1);
2704 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2706 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2708 } else if (Name == "clz.ll") {
2709 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2710 Value *Arg = CI->getArgOperand(0);
2711 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2712 {Arg, Builder.getFalse()},
2713 /*FMFSource=*/nullptr, "ctlz");
2714 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2715 } else if (Name == "popc.ll") {
2716 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2717 // i64.
2718 Value *Arg = CI->getArgOperand(0);
2719 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2720 Arg, /*FMFSource=*/nullptr, "ctpop");
2721 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2722 } else if (Name == "h2f") {
2723 Value *Cast =
2724 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2725 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2726 } else if (Name.consume_front("bitcast.") &&
2727 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2728 Name == "d2ll")) {
2729 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2730 } else if (Name == "rotate.b32") {
2731 Value *Arg = CI->getOperand(0);
2732 Value *ShiftAmt = CI->getOperand(1);
2733 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2734 {Arg, Arg, ShiftAmt});
2735 } else if (Name == "rotate.b64") {
2736 Type *Int64Ty = Builder.getInt64Ty();
2737 Value *Arg = CI->getOperand(0);
2738 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2739 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2740 {Arg, Arg, ZExtShiftAmt});
2741 } else if (Name == "rotate.right.b64") {
2742 Type *Int64Ty = Builder.getInt64Ty();
2743 Value *Arg = CI->getOperand(0);
2744 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2745 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2746 {Arg, Arg, ZExtShiftAmt});
2747 } else if (Name == "swap.lo.hi.b64") {
2748 Type *Int64Ty = Builder.getInt64Ty();
2749 Value *Arg = CI->getOperand(0);
2750 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2751 {Arg, Arg, Builder.getInt64(32)});
2752 } else if ((Name.consume_front("ptr.gen.to.") &&
2753 consumeNVVMPtrAddrSpace(Name)) ||
2754 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2755 Name.starts_with(".to.gen"))) {
2756 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2757 } else if (Name.consume_front("ldg.global")) {
2758 Value *Ptr = CI->getArgOperand(0);
2759 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2760 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2761 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2762 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2763 MDNode *MD = MDNode::get(Builder.getContext(), {});
2764 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2765 return LD;
2766 } else if (Name == "tanh.approx.f32") {
2767 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2768 FastMathFlags FMF;
2769 FMF.setApproxFunc();
2770 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2771 FMF);
2772 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2773 Value *Arg =
2774 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2775 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2776 {}, {Arg});
2777 } else if (Name == "barrier") {
2778 Rep = Builder.CreateIntrinsic(
2779 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2780 {CI->getArgOperand(0), CI->getArgOperand(1)});
2781 } else if (Name == "barrier.sync") {
2782 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2783 {CI->getArgOperand(0)});
2784 } else if (Name == "barrier.sync.cnt") {
2785 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2786 {CI->getArgOperand(0), CI->getArgOperand(1)});
2787 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2788 Name == "barrier0.or") {
2789 Value *C = CI->getArgOperand(0);
2790 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2791
2792 Intrinsic::ID IID =
2794 .Case("barrier0.popc",
2795 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2796 .Case("barrier0.and",
2797 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2798 .Case("barrier0.or",
2799 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2800 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2801 Rep = Builder.CreateZExt(Bar, CI->getType());
2802 } else {
2804 if (IID != Intrinsic::not_intrinsic &&
2805 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2806 rename(F);
2807 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2809 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2810 Value *Arg = CI->getArgOperand(I);
2811 Type *OldType = Arg->getType();
2812 Type *NewType = NewFn->getArg(I)->getType();
2813 Args.push_back(
2814 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2815 ? Builder.CreateBitCast(Arg, NewType)
2816 : Arg);
2817 }
2818 Rep = Builder.CreateCall(NewFn, Args);
2819 if (F->getReturnType()->isIntegerTy())
2820 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2821 }
2822 }
2823
2824 return Rep;
2825}
2826
2828 IRBuilder<> &Builder) {
2829 LLVMContext &C = F->getContext();
2830 Value *Rep = nullptr;
2831
2832 if (Name.starts_with("sse4a.movnt.")) {
2834 Elts.push_back(
2835 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2836 MDNode *Node = MDNode::get(C, Elts);
2837
2838 Value *Arg0 = CI->getArgOperand(0);
2839 Value *Arg1 = CI->getArgOperand(1);
2840
2841 // Nontemporal (unaligned) store of the 0'th element of the float/double
2842 // vector.
2843 Value *Extract =
2844 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2845
2846 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2847 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2848 } else if (Name.starts_with("avx.movnt.") ||
2849 Name.starts_with("avx512.storent.")) {
2851 Elts.push_back(
2852 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2853 MDNode *Node = MDNode::get(C, Elts);
2854
2855 Value *Arg0 = CI->getArgOperand(0);
2856 Value *Arg1 = CI->getArgOperand(1);
2857
2858 StoreInst *SI = Builder.CreateAlignedStore(
2859 Arg1, Arg0,
2861 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2862 } else if (Name == "sse2.storel.dq") {
2863 Value *Arg0 = CI->getArgOperand(0);
2864 Value *Arg1 = CI->getArgOperand(1);
2865
2866 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2867 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2868 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2869 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2870 } else if (Name.starts_with("sse.storeu.") ||
2871 Name.starts_with("sse2.storeu.") ||
2872 Name.starts_with("avx.storeu.")) {
2873 Value *Arg0 = CI->getArgOperand(0);
2874 Value *Arg1 = CI->getArgOperand(1);
2875 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2876 } else if (Name == "avx512.mask.store.ss") {
2877 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2878 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2879 Mask, false);
2880 } else if (Name.starts_with("avx512.mask.store")) {
2881 // "avx512.mask.storeu." or "avx512.mask.store."
2882 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2883 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2884 CI->getArgOperand(2), Aligned);
2885 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2886 // Upgrade packed integer vector compare intrinsics to compare instructions.
2887 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2888 bool CmpEq = Name[9] == 'e';
2889 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2890 CI->getArgOperand(0), CI->getArgOperand(1));
2891 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2892 } else if (Name.starts_with("avx512.broadcastm")) {
2893 Type *ExtTy = Type::getInt32Ty(C);
2894 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2895 ExtTy = Type::getInt64Ty(C);
2896 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2897 ExtTy->getPrimitiveSizeInBits();
2898 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2899 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2900 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2901 Value *Vec = CI->getArgOperand(0);
2902 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2903 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2904 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2905 } else if (Name.starts_with("avx.sqrt.p") ||
2906 Name.starts_with("sse2.sqrt.p") ||
2907 Name.starts_with("sse.sqrt.p")) {
2908 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2909 {CI->getArgOperand(0)});
2910 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2911 if (CI->arg_size() == 4 &&
2912 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2913 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2914 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2915 : Intrinsic::x86_avx512_sqrt_pd_512;
2916
2917 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2918 Rep = Builder.CreateIntrinsic(IID, Args);
2919 } else {
2920 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2921 {CI->getArgOperand(0)});
2922 }
2923 Rep =
2924 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2925 } else if (Name.starts_with("avx512.ptestm") ||
2926 Name.starts_with("avx512.ptestnm")) {
2927 Value *Op0 = CI->getArgOperand(0);
2928 Value *Op1 = CI->getArgOperand(1);
2929 Value *Mask = CI->getArgOperand(2);
2930 Rep = Builder.CreateAnd(Op0, Op1);
2931 llvm::Type *Ty = Op0->getType();
2933 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2936 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2937 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2938 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2939 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2940 ->getNumElements();
2941 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2942 Rep =
2943 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2944 } else if (Name.starts_with("avx512.kunpck")) {
2945 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2946 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2947 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2948 int Indices[64];
2949 for (unsigned i = 0; i != NumElts; ++i)
2950 Indices[i] = i;
2951
2952 // First extract half of each vector. This gives better codegen than
2953 // doing it in a single shuffle.
2954 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2955 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2956 // Concat the vectors.
2957 // NOTE: Operands have to be swapped to match intrinsic definition.
2958 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2959 Rep = Builder.CreateBitCast(Rep, CI->getType());
2960 } else if (Name == "avx512.kand.w") {
2961 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2962 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2963 Rep = Builder.CreateAnd(LHS, RHS);
2964 Rep = Builder.CreateBitCast(Rep, CI->getType());
2965 } else if (Name == "avx512.kandn.w") {
2966 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2967 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2968 LHS = Builder.CreateNot(LHS);
2969 Rep = Builder.CreateAnd(LHS, RHS);
2970 Rep = Builder.CreateBitCast(Rep, CI->getType());
2971 } else if (Name == "avx512.kor.w") {
2972 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2973 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2974 Rep = Builder.CreateOr(LHS, RHS);
2975 Rep = Builder.CreateBitCast(Rep, CI->getType());
2976 } else if (Name == "avx512.kxor.w") {
2977 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2978 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2979 Rep = Builder.CreateXor(LHS, RHS);
2980 Rep = Builder.CreateBitCast(Rep, CI->getType());
2981 } else if (Name == "avx512.kxnor.w") {
2982 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2983 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2984 LHS = Builder.CreateNot(LHS);
2985 Rep = Builder.CreateXor(LHS, RHS);
2986 Rep = Builder.CreateBitCast(Rep, CI->getType());
2987 } else if (Name == "avx512.knot.w") {
2988 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2989 Rep = Builder.CreateNot(Rep);
2990 Rep = Builder.CreateBitCast(Rep, CI->getType());
2991 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2992 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2993 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2994 Rep = Builder.CreateOr(LHS, RHS);
2995 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2996 Value *C;
2997 if (Name[14] == 'c')
2998 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2999 else
3000 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3001 Rep = Builder.CreateICmpEQ(Rep, C);
3002 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3003 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3004 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3005 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3006 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3007 Type *I32Ty = Type::getInt32Ty(C);
3008 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3009 ConstantInt::get(I32Ty, 0));
3010 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3011 ConstantInt::get(I32Ty, 0));
3012 Value *EltOp;
3013 if (Name.contains(".add."))
3014 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3015 else if (Name.contains(".sub."))
3016 EltOp = Builder.CreateFSub(Elt0, Elt1);
3017 else if (Name.contains(".mul."))
3018 EltOp = Builder.CreateFMul(Elt0, Elt1);
3019 else
3020 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3021 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3022 ConstantInt::get(I32Ty, 0));
3023 } else if (Name.starts_with("avx512.mask.pcmp")) {
3024 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3025 bool CmpEq = Name[16] == 'e';
3026 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3027 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3028 Type *OpTy = CI->getArgOperand(0)->getType();
3029 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3030 Intrinsic::ID IID;
3031 switch (VecWidth) {
3032 default:
3033 llvm_unreachable("Unexpected intrinsic");
3034 case 128:
3035 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3036 break;
3037 case 256:
3038 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3039 break;
3040 case 512:
3041 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3042 break;
3043 }
3044
3045 Rep =
3046 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3047 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3048 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3049 Type *OpTy = CI->getArgOperand(0)->getType();
3050 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3051 unsigned EltWidth = OpTy->getScalarSizeInBits();
3052 Intrinsic::ID IID;
3053 if (VecWidth == 128 && EltWidth == 32)
3054 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3055 else if (VecWidth == 256 && EltWidth == 32)
3056 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3057 else if (VecWidth == 512 && EltWidth == 32)
3058 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3059 else if (VecWidth == 128 && EltWidth == 64)
3060 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3061 else if (VecWidth == 256 && EltWidth == 64)
3062 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3063 else if (VecWidth == 512 && EltWidth == 64)
3064 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3065 else
3066 llvm_unreachable("Unexpected intrinsic");
3067
3068 Rep =
3069 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3070 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3071 } else if (Name.starts_with("avx512.cmp.p")) {
3072 SmallVector<Value *, 4> Args(CI->args());
3073 Type *OpTy = Args[0]->getType();
3074 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3075 unsigned EltWidth = OpTy->getScalarSizeInBits();
3076 Intrinsic::ID IID;
3077 if (VecWidth == 128 && EltWidth == 32)
3078 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3079 else if (VecWidth == 256 && EltWidth == 32)
3080 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3081 else if (VecWidth == 512 && EltWidth == 32)
3082 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3083 else if (VecWidth == 128 && EltWidth == 64)
3084 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3085 else if (VecWidth == 256 && EltWidth == 64)
3086 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3087 else if (VecWidth == 512 && EltWidth == 64)
3088 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3089 else
3090 llvm_unreachable("Unexpected intrinsic");
3091
3093 if (VecWidth == 512)
3094 std::swap(Mask, Args.back());
3095 Args.push_back(Mask);
3096
3097 Rep = Builder.CreateIntrinsic(IID, Args);
3098 } else if (Name.starts_with("avx512.mask.cmp.")) {
3099 // Integer compare intrinsics.
3100 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3101 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3102 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3103 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3104 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3105 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3106 Name.starts_with("avx512.cvtw2mask.") ||
3107 Name.starts_with("avx512.cvtd2mask.") ||
3108 Name.starts_with("avx512.cvtq2mask.")) {
3109 Value *Op = CI->getArgOperand(0);
3110 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3111 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3112 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3113 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3114 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3115 Name.starts_with("avx512.mask.pabs")) {
3116 Rep = upgradeAbs(Builder, *CI);
3117 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3118 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3119 Name.starts_with("avx512.mask.pmaxs")) {
3120 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3121 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3122 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3123 Name.starts_with("avx512.mask.pmaxu")) {
3124 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3125 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3126 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3127 Name.starts_with("avx512.mask.pmins")) {
3128 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3129 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3130 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3131 Name.starts_with("avx512.mask.pminu")) {
3132 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3133 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3134 Name == "avx512.pmulu.dq.512" ||
3135 Name.starts_with("avx512.mask.pmulu.dq.")) {
3136 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3137 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3138 Name == "avx512.pmul.dq.512" ||
3139 Name.starts_with("avx512.mask.pmul.dq.")) {
3140 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3141 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3142 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3143 Rep =
3144 Builder.CreateSIToFP(CI->getArgOperand(1),
3145 cast<VectorType>(CI->getType())->getElementType());
3146 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3147 } else if (Name == "avx512.cvtusi2sd") {
3148 Rep =
3149 Builder.CreateUIToFP(CI->getArgOperand(1),
3150 cast<VectorType>(CI->getType())->getElementType());
3151 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3152 } else if (Name == "sse2.cvtss2sd") {
3153 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3154 Rep = Builder.CreateFPExt(
3155 Rep, cast<VectorType>(CI->getType())->getElementType());
3156 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3157 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3158 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3159 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3160 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3161 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3162 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3163 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3164 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3165 Name == "avx512.mask.cvtqq2ps.256" ||
3166 Name == "avx512.mask.cvtqq2ps.512" ||
3167 Name == "avx512.mask.cvtuqq2ps.256" ||
3168 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3169 Name == "avx.cvt.ps2.pd.256" ||
3170 Name == "avx512.mask.cvtps2pd.128" ||
3171 Name == "avx512.mask.cvtps2pd.256") {
3172 auto *DstTy = cast<FixedVectorType>(CI->getType());
3173 Rep = CI->getArgOperand(0);
3174 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3175
3176 unsigned NumDstElts = DstTy->getNumElements();
3177 if (NumDstElts < SrcTy->getNumElements()) {
3178 assert(NumDstElts == 2 && "Unexpected vector size");
3179 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3180 }
3181
3182 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3183 bool IsUnsigned = Name.contains("cvtu");
3184 if (IsPS2PD)
3185 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3186 else if (CI->arg_size() == 4 &&
3187 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3188 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3189 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3190 : Intrinsic::x86_avx512_sitofp_round;
3191 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3192 {Rep, CI->getArgOperand(3)});
3193 } else {
3194 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3195 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3196 }
3197
3198 if (CI->arg_size() >= 3)
3199 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3200 CI->getArgOperand(1));
3201 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3202 Name.starts_with("vcvtph2ps.")) {
3203 auto *DstTy = cast<FixedVectorType>(CI->getType());
3204 Rep = CI->getArgOperand(0);
3205 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3206 unsigned NumDstElts = DstTy->getNumElements();
3207 if (NumDstElts != SrcTy->getNumElements()) {
3208 assert(NumDstElts == 4 && "Unexpected vector size");
3209 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3210 }
3211 Rep = Builder.CreateBitCast(
3212 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3213 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3214 if (CI->arg_size() >= 3)
3215 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3216 CI->getArgOperand(1));
3217 } else if (Name.starts_with("avx512.mask.load")) {
3218 // "avx512.mask.loadu." or "avx512.mask.load."
3219 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3220 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3221 CI->getArgOperand(2), Aligned);
3222 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3223 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3224 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3225 ResultTy->getNumElements());
3226
3227 Rep = Builder.CreateIntrinsic(
3228 Intrinsic::masked_expandload, ResultTy,
3229 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3230 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3231 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3232 Value *MaskVec =
3233 getX86MaskVec(Builder, CI->getArgOperand(2),
3234 cast<FixedVectorType>(ResultTy)->getNumElements());
3235
3236 Rep = Builder.CreateIntrinsic(
3237 Intrinsic::masked_compressstore, ResultTy,
3238 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3239 } else if (Name.starts_with("avx512.mask.compress.") ||
3240 Name.starts_with("avx512.mask.expand.")) {
3241 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3242
3243 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3244 ResultTy->getNumElements());
3245
3246 bool IsCompress = Name[12] == 'c';
3247 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3248 : Intrinsic::x86_avx512_mask_expand;
3249 Rep = Builder.CreateIntrinsic(
3250 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3251 } else if (Name.starts_with("xop.vpcom")) {
3252 bool IsSigned;
3253 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3254 Name.ends_with("uq"))
3255 IsSigned = false;
3256 else if (Name.ends_with("b") || Name.ends_with("w") ||
3257 Name.ends_with("d") || Name.ends_with("q"))
3258 IsSigned = true;
3259 else
3260 llvm_unreachable("Unknown suffix");
3261
3262 unsigned Imm;
3263 if (CI->arg_size() == 3) {
3264 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3265 } else {
3266 Name = Name.substr(9); // strip off "xop.vpcom"
3267 if (Name.starts_with("lt"))
3268 Imm = 0;
3269 else if (Name.starts_with("le"))
3270 Imm = 1;
3271 else if (Name.starts_with("gt"))
3272 Imm = 2;
3273 else if (Name.starts_with("ge"))
3274 Imm = 3;
3275 else if (Name.starts_with("eq"))
3276 Imm = 4;
3277 else if (Name.starts_with("ne"))
3278 Imm = 5;
3279 else if (Name.starts_with("false"))
3280 Imm = 6;
3281 else if (Name.starts_with("true"))
3282 Imm = 7;
3283 else
3284 llvm_unreachable("Unknown condition");
3285 }
3286
3287 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3288 } else if (Name.starts_with("xop.vpcmov")) {
3289 Value *Sel = CI->getArgOperand(2);
3290 Value *NotSel = Builder.CreateNot(Sel);
3291 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3292 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3293 Rep = Builder.CreateOr(Sel0, Sel1);
3294 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3295 Name.starts_with("avx512.mask.prol")) {
3296 Rep = upgradeX86Rotate(Builder, *CI, false);
3297 } else if (Name.starts_with("avx512.pror") ||
3298 Name.starts_with("avx512.mask.pror")) {
3299 Rep = upgradeX86Rotate(Builder, *CI, true);
3300 } else if (Name.starts_with("avx512.vpshld.") ||
3301 Name.starts_with("avx512.mask.vpshld") ||
3302 Name.starts_with("avx512.maskz.vpshld")) {
3303 bool ZeroMask = Name[11] == 'z';
3304 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3305 } else if (Name.starts_with("avx512.vpshrd.") ||
3306 Name.starts_with("avx512.mask.vpshrd") ||
3307 Name.starts_with("avx512.maskz.vpshrd")) {
3308 bool ZeroMask = Name[11] == 'z';
3309 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3310 } else if (Name == "sse42.crc32.64.8") {
3311 Value *Trunc0 =
3312 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3313 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3314 {Trunc0, CI->getArgOperand(1)});
3315 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3316 } else if (Name.starts_with("avx.vbroadcast.s") ||
3317 Name.starts_with("avx512.vbroadcast.s")) {
3318 // Replace broadcasts with a series of insertelements.
3319 auto *VecTy = cast<FixedVectorType>(CI->getType());
3320 Type *EltTy = VecTy->getElementType();
3321 unsigned EltNum = VecTy->getNumElements();
3322 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3323 Type *I32Ty = Type::getInt32Ty(C);
3324 Rep = PoisonValue::get(VecTy);
3325 for (unsigned I = 0; I < EltNum; ++I)
3326 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3327 } else if (Name.starts_with("sse41.pmovsx") ||
3328 Name.starts_with("sse41.pmovzx") ||
3329 Name.starts_with("avx2.pmovsx") ||
3330 Name.starts_with("avx2.pmovzx") ||
3331 Name.starts_with("avx512.mask.pmovsx") ||
3332 Name.starts_with("avx512.mask.pmovzx")) {
3333 auto *DstTy = cast<FixedVectorType>(CI->getType());
3334 unsigned NumDstElts = DstTy->getNumElements();
3335
3336 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3337 SmallVector<int, 8> ShuffleMask(NumDstElts);
3338 for (unsigned i = 0; i != NumDstElts; ++i)
3339 ShuffleMask[i] = i;
3340
3341 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3342
3343 bool DoSext = Name.contains("pmovsx");
3344 Rep =
3345 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3346 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3347 if (CI->arg_size() == 3)
3348 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3349 CI->getArgOperand(1));
3350 } else if (Name == "avx512.mask.pmov.qd.256" ||
3351 Name == "avx512.mask.pmov.qd.512" ||
3352 Name == "avx512.mask.pmov.wb.256" ||
3353 Name == "avx512.mask.pmov.wb.512") {
3354 Type *Ty = CI->getArgOperand(1)->getType();
3355 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3356 Rep =
3357 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3358 } else if (Name.starts_with("avx.vbroadcastf128") ||
3359 Name == "avx2.vbroadcasti128") {
3360 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3361 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3362 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3363 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3364 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3365 if (NumSrcElts == 2)
3366 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3367 else
3368 Rep = Builder.CreateShuffleVector(Load,
3369 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3370 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3371 Name.starts_with("avx512.mask.shuf.f")) {
3372 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3373 Type *VT = CI->getType();
3374 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3375 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3376 unsigned ControlBitsMask = NumLanes - 1;
3377 unsigned NumControlBits = NumLanes / 2;
3378 SmallVector<int, 8> ShuffleMask(0);
3379
3380 for (unsigned l = 0; l != NumLanes; ++l) {
3381 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3382 // We actually need the other source.
3383 if (l >= NumLanes / 2)
3384 LaneMask += NumLanes;
3385 for (unsigned i = 0; i != NumElementsInLane; ++i)
3386 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3387 }
3388 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3389 CI->getArgOperand(1), ShuffleMask);
3390 Rep =
3391 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3392 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3393 Name.starts_with("avx512.mask.broadcasti")) {
3394 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3395 ->getNumElements();
3396 unsigned NumDstElts =
3397 cast<FixedVectorType>(CI->getType())->getNumElements();
3398
3399 SmallVector<int, 8> ShuffleMask(NumDstElts);
3400 for (unsigned i = 0; i != NumDstElts; ++i)
3401 ShuffleMask[i] = i % NumSrcElts;
3402
3403 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3404 CI->getArgOperand(0), ShuffleMask);
3405 Rep =
3406 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3407 } else if (Name.starts_with("avx2.pbroadcast") ||
3408 Name.starts_with("avx2.vbroadcast") ||
3409 Name.starts_with("avx512.pbroadcast") ||
3410 Name.starts_with("avx512.mask.broadcast.s")) {
3411 // Replace vp?broadcasts with a vector shuffle.
3412 Value *Op = CI->getArgOperand(0);
3413 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3414 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3417 Rep = Builder.CreateShuffleVector(Op, M);
3418
3419 if (CI->arg_size() == 3)
3420 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3421 CI->getArgOperand(1));
3422 } else if (Name.starts_with("sse2.padds.") ||
3423 Name.starts_with("avx2.padds.") ||
3424 Name.starts_with("avx512.padds.") ||
3425 Name.starts_with("avx512.mask.padds.")) {
3426 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3427 } else if (Name.starts_with("sse2.psubs.") ||
3428 Name.starts_with("avx2.psubs.") ||
3429 Name.starts_with("avx512.psubs.") ||
3430 Name.starts_with("avx512.mask.psubs.")) {
3431 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3432 } else if (Name.starts_with("sse2.paddus.") ||
3433 Name.starts_with("avx2.paddus.") ||
3434 Name.starts_with("avx512.mask.paddus.")) {
3435 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3436 } else if (Name.starts_with("sse2.psubus.") ||
3437 Name.starts_with("avx2.psubus.") ||
3438 Name.starts_with("avx512.mask.psubus.")) {
3439 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3440 } else if (Name.starts_with("avx512.mask.palignr.")) {
3441 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3442 CI->getArgOperand(1), CI->getArgOperand(2),
3443 CI->getArgOperand(3), CI->getArgOperand(4),
3444 false);
3445 } else if (Name.starts_with("avx512.mask.valign.")) {
3447 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3448 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3449 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3450 // 128/256-bit shift left specified in bits.
3451 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3452 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3453 Shift / 8); // Shift is in bits.
3454 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3455 // 128/256-bit shift right specified in bits.
3456 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3457 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3458 Shift / 8); // Shift is in bits.
3459 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3460 Name == "avx512.psll.dq.512") {
3461 // 128/256/512-bit shift left specified in bytes.
3462 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3463 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3464 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3465 Name == "avx512.psrl.dq.512") {
3466 // 128/256/512-bit shift right specified in bytes.
3467 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3468 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3469 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3470 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3471 Name.starts_with("avx2.pblendd.")) {
3472 Value *Op0 = CI->getArgOperand(0);
3473 Value *Op1 = CI->getArgOperand(1);
3474 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3475 auto *VecTy = cast<FixedVectorType>(CI->getType());
3476 unsigned NumElts = VecTy->getNumElements();
3477
3478 SmallVector<int, 16> Idxs(NumElts);
3479 for (unsigned i = 0; i != NumElts; ++i)
3480 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3481
3482 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3483 } else if (Name.starts_with("avx.vinsertf128.") ||
3484 Name == "avx2.vinserti128" ||
3485 Name.starts_with("avx512.mask.insert")) {
3486 Value *Op0 = CI->getArgOperand(0);
3487 Value *Op1 = CI->getArgOperand(1);
3488 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3489 unsigned DstNumElts =
3490 cast<FixedVectorType>(CI->getType())->getNumElements();
3491 unsigned SrcNumElts =
3492 cast<FixedVectorType>(Op1->getType())->getNumElements();
3493 unsigned Scale = DstNumElts / SrcNumElts;
3494
3495 // Mask off the high bits of the immediate value; hardware ignores those.
3496 Imm = Imm % Scale;
3497
3498 // Extend the second operand into a vector the size of the destination.
3499 SmallVector<int, 8> Idxs(DstNumElts);
3500 for (unsigned i = 0; i != SrcNumElts; ++i)
3501 Idxs[i] = i;
3502 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3503 Idxs[i] = SrcNumElts;
3504 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3505
3506 // Insert the second operand into the first operand.
3507
3508 // Note that there is no guarantee that instruction lowering will actually
3509 // produce a vinsertf128 instruction for the created shuffles. In
3510 // particular, the 0 immediate case involves no lane changes, so it can
3511 // be handled as a blend.
3512
3513 // Example of shuffle mask for 32-bit elements:
3514 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3515 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3516
3517 // First fill with identify mask.
3518 for (unsigned i = 0; i != DstNumElts; ++i)
3519 Idxs[i] = i;
3520 // Then replace the elements where we need to insert.
3521 for (unsigned i = 0; i != SrcNumElts; ++i)
3522 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3523 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3524
3525 // If the intrinsic has a mask operand, handle that.
3526 if (CI->arg_size() == 5)
3527 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3528 CI->getArgOperand(3));
3529 } else if (Name.starts_with("avx.vextractf128.") ||
3530 Name == "avx2.vextracti128" ||
3531 Name.starts_with("avx512.mask.vextract")) {
3532 Value *Op0 = CI->getArgOperand(0);
3533 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3534 unsigned DstNumElts =
3535 cast<FixedVectorType>(CI->getType())->getNumElements();
3536 unsigned SrcNumElts =
3537 cast<FixedVectorType>(Op0->getType())->getNumElements();
3538 unsigned Scale = SrcNumElts / DstNumElts;
3539
3540 // Mask off the high bits of the immediate value; hardware ignores those.
3541 Imm = Imm % Scale;
3542
3543 // Get indexes for the subvector of the input vector.
3544 SmallVector<int, 8> Idxs(DstNumElts);
3545 for (unsigned i = 0; i != DstNumElts; ++i) {
3546 Idxs[i] = i + (Imm * DstNumElts);
3547 }
3548 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3549
3550 // If the intrinsic has a mask operand, handle that.
3551 if (CI->arg_size() == 4)
3552 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3553 CI->getArgOperand(2));
3554 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3555 Name.starts_with("avx512.mask.perm.di.")) {
3556 Value *Op0 = CI->getArgOperand(0);
3557 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3558 auto *VecTy = cast<FixedVectorType>(CI->getType());
3559 unsigned NumElts = VecTy->getNumElements();
3560
3561 SmallVector<int, 8> Idxs(NumElts);
3562 for (unsigned i = 0; i != NumElts; ++i)
3563 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3564
3565 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3566
3567 if (CI->arg_size() == 4)
3568 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3569 CI->getArgOperand(2));
3570 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3571 // The immediate permute control byte looks like this:
3572 // [1:0] - select 128 bits from sources for low half of destination
3573 // [2] - ignore
3574 // [3] - zero low half of destination
3575 // [5:4] - select 128 bits from sources for high half of destination
3576 // [6] - ignore
3577 // [7] - zero high half of destination
3578
3579 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3580
3581 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3582 unsigned HalfSize = NumElts / 2;
3583 SmallVector<int, 8> ShuffleMask(NumElts);
3584
3585 // Determine which operand(s) are actually in use for this instruction.
3586 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3587 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3588
3589 // If needed, replace operands based on zero mask.
3590 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3591 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3592
3593 // Permute low half of result.
3594 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3595 for (unsigned i = 0; i < HalfSize; ++i)
3596 ShuffleMask[i] = StartIndex + i;
3597
3598 // Permute high half of result.
3599 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3600 for (unsigned i = 0; i < HalfSize; ++i)
3601 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3602
3603 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3604
3605 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3606 Name.starts_with("avx512.mask.vpermil.p") ||
3607 Name.starts_with("avx512.mask.pshuf.d.")) {
3608 Value *Op0 = CI->getArgOperand(0);
3609 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3610 auto *VecTy = cast<FixedVectorType>(CI->getType());
3611 unsigned NumElts = VecTy->getNumElements();
3612 // Calculate the size of each index in the immediate.
3613 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3614 unsigned IdxMask = ((1 << IdxSize) - 1);
3615
3616 SmallVector<int, 8> Idxs(NumElts);
3617 // Lookup the bits for this element, wrapping around the immediate every
3618 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3619 // to offset by the first index of each group.
3620 for (unsigned i = 0; i != NumElts; ++i)
3621 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3622
3623 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3624
3625 if (CI->arg_size() == 4)
3626 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3627 CI->getArgOperand(2));
3628 } else if (Name == "sse2.pshufl.w" ||
3629 Name.starts_with("avx512.mask.pshufl.w.")) {
3630 Value *Op0 = CI->getArgOperand(0);
3631 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3632 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3633
3634 SmallVector<int, 16> Idxs(NumElts);
3635 for (unsigned l = 0; l != NumElts; l += 8) {
3636 for (unsigned i = 0; i != 4; ++i)
3637 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3638 for (unsigned i = 4; i != 8; ++i)
3639 Idxs[i + l] = i + l;
3640 }
3641
3642 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3643
3644 if (CI->arg_size() == 4)
3645 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3646 CI->getArgOperand(2));
3647 } else if (Name == "sse2.pshufh.w" ||
3648 Name.starts_with("avx512.mask.pshufh.w.")) {
3649 Value *Op0 = CI->getArgOperand(0);
3650 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3651 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3652
3653 SmallVector<int, 16> Idxs(NumElts);
3654 for (unsigned l = 0; l != NumElts; l += 8) {
3655 for (unsigned i = 0; i != 4; ++i)
3656 Idxs[i + l] = i + l;
3657 for (unsigned i = 0; i != 4; ++i)
3658 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3659 }
3660
3661 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3662
3663 if (CI->arg_size() == 4)
3664 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3665 CI->getArgOperand(2));
3666 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3667 Value *Op0 = CI->getArgOperand(0);
3668 Value *Op1 = CI->getArgOperand(1);
3669 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3670 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3671
3672 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3673 unsigned HalfLaneElts = NumLaneElts / 2;
3674
3675 SmallVector<int, 16> Idxs(NumElts);
3676 for (unsigned i = 0; i != NumElts; ++i) {
3677 // Base index is the starting element of the lane.
3678 Idxs[i] = i - (i % NumLaneElts);
3679 // If we are half way through the lane switch to the other source.
3680 if ((i % NumLaneElts) >= HalfLaneElts)
3681 Idxs[i] += NumElts;
3682 // Now select the specific element. By adding HalfLaneElts bits from
3683 // the immediate. Wrapping around the immediate every 8-bits.
3684 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3685 }
3686
3687 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3688
3689 Rep =
3690 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3691 } else if (Name.starts_with("avx512.mask.movddup") ||
3692 Name.starts_with("avx512.mask.movshdup") ||
3693 Name.starts_with("avx512.mask.movsldup")) {
3694 Value *Op0 = CI->getArgOperand(0);
3695 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3696 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3697
3698 unsigned Offset = 0;
3699 if (Name.starts_with("avx512.mask.movshdup."))
3700 Offset = 1;
3701
3702 SmallVector<int, 16> Idxs(NumElts);
3703 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3704 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3705 Idxs[i + l + 0] = i + l + Offset;
3706 Idxs[i + l + 1] = i + l + Offset;
3707 }
3708
3709 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3710
3711 Rep =
3712 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3713 } else if (Name.starts_with("avx512.mask.punpckl") ||
3714 Name.starts_with("avx512.mask.unpckl.")) {
3715 Value *Op0 = CI->getArgOperand(0);
3716 Value *Op1 = CI->getArgOperand(1);
3717 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3718 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3719
3720 SmallVector<int, 64> Idxs(NumElts);
3721 for (int l = 0; l != NumElts; l += NumLaneElts)
3722 for (int i = 0; i != NumLaneElts; ++i)
3723 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3724
3725 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3726
3727 Rep =
3728 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3729 } else if (Name.starts_with("avx512.mask.punpckh") ||
3730 Name.starts_with("avx512.mask.unpckh.")) {
3731 Value *Op0 = CI->getArgOperand(0);
3732 Value *Op1 = CI->getArgOperand(1);
3733 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3734 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3735
3736 SmallVector<int, 64> Idxs(NumElts);
3737 for (int l = 0; l != NumElts; l += NumLaneElts)
3738 for (int i = 0; i != NumLaneElts; ++i)
3739 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3740
3741 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3742
3743 Rep =
3744 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3745 } else if (Name.starts_with("avx512.mask.and.") ||
3746 Name.starts_with("avx512.mask.pand.")) {
3747 VectorType *FTy = cast<VectorType>(CI->getType());
3749 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3750 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3751 Rep = Builder.CreateBitCast(Rep, FTy);
3752 Rep =
3753 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3754 } else if (Name.starts_with("avx512.mask.andn.") ||
3755 Name.starts_with("avx512.mask.pandn.")) {
3756 VectorType *FTy = cast<VectorType>(CI->getType());
3758 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3759 Rep = Builder.CreateAnd(Rep,
3760 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3761 Rep = Builder.CreateBitCast(Rep, FTy);
3762 Rep =
3763 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3764 } else if (Name.starts_with("avx512.mask.or.") ||
3765 Name.starts_with("avx512.mask.por.")) {
3766 VectorType *FTy = cast<VectorType>(CI->getType());
3768 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3769 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3770 Rep = Builder.CreateBitCast(Rep, FTy);
3771 Rep =
3772 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3773 } else if (Name.starts_with("avx512.mask.xor.") ||
3774 Name.starts_with("avx512.mask.pxor.")) {
3775 VectorType *FTy = cast<VectorType>(CI->getType());
3777 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3778 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3779 Rep = Builder.CreateBitCast(Rep, FTy);
3780 Rep =
3781 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3782 } else if (Name.starts_with("avx512.mask.padd.")) {
3783 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3784 Rep =
3785 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3786 } else if (Name.starts_with("avx512.mask.psub.")) {
3787 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3788 Rep =
3789 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3790 } else if (Name.starts_with("avx512.mask.pmull.")) {
3791 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3792 Rep =
3793 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3794 } else if (Name.starts_with("avx512.mask.add.p")) {
3795 if (Name.ends_with(".512")) {
3796 Intrinsic::ID IID;
3797 if (Name[17] == 's')
3798 IID = Intrinsic::x86_avx512_add_ps_512;
3799 else
3800 IID = Intrinsic::x86_avx512_add_pd_512;
3801
3802 Rep = Builder.CreateIntrinsic(
3803 IID,
3804 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3805 } else {
3806 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3807 }
3808 Rep =
3809 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3810 } else if (Name.starts_with("avx512.mask.div.p")) {
3811 if (Name.ends_with(".512")) {
3812 Intrinsic::ID IID;
3813 if (Name[17] == 's')
3814 IID = Intrinsic::x86_avx512_div_ps_512;
3815 else
3816 IID = Intrinsic::x86_avx512_div_pd_512;
3817
3818 Rep = Builder.CreateIntrinsic(
3819 IID,
3820 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3821 } else {
3822 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3823 }
3824 Rep =
3825 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3826 } else if (Name.starts_with("avx512.mask.mul.p")) {
3827 if (Name.ends_with(".512")) {
3828 Intrinsic::ID IID;
3829 if (Name[17] == 's')
3830 IID = Intrinsic::x86_avx512_mul_ps_512;
3831 else
3832 IID = Intrinsic::x86_avx512_mul_pd_512;
3833
3834 Rep = Builder.CreateIntrinsic(
3835 IID,
3836 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3837 } else {
3838 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3839 }
3840 Rep =
3841 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3842 } else if (Name.starts_with("avx512.mask.sub.p")) {
3843 if (Name.ends_with(".512")) {
3844 Intrinsic::ID IID;
3845 if (Name[17] == 's')
3846 IID = Intrinsic::x86_avx512_sub_ps_512;
3847 else
3848 IID = Intrinsic::x86_avx512_sub_pd_512;
3849
3850 Rep = Builder.CreateIntrinsic(
3851 IID,
3852 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3853 } else {
3854 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3855 }
3856 Rep =
3857 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3858 } else if ((Name.starts_with("avx512.mask.max.p") ||
3859 Name.starts_with("avx512.mask.min.p")) &&
3860 Name.drop_front(18) == ".512") {
3861 bool IsDouble = Name[17] == 'd';
3862 bool IsMin = Name[13] == 'i';
3863 static const Intrinsic::ID MinMaxTbl[2][2] = {
3864 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3865 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3866 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3867
3868 Rep = Builder.CreateIntrinsic(
3869 IID,
3870 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3871 Rep =
3872 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3873 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3874 Rep =
3875 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3876 {CI->getArgOperand(0), Builder.getInt1(false)});
3877 Rep =
3878 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3879 } else if (Name.starts_with("avx512.mask.psll")) {
3880 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3881 bool IsVariable = Name[16] == 'v';
3882 char Size = Name[16] == '.' ? Name[17]
3883 : Name[17] == '.' ? Name[18]
3884 : Name[18] == '.' ? Name[19]
3885 : Name[20];
3886
3887 Intrinsic::ID IID;
3888 if (IsVariable && Name[17] != '.') {
3889 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3890 IID = Intrinsic::x86_avx2_psllv_q;
3891 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3892 IID = Intrinsic::x86_avx2_psllv_q_256;
3893 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3894 IID = Intrinsic::x86_avx2_psllv_d;
3895 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3896 IID = Intrinsic::x86_avx2_psllv_d_256;
3897 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3898 IID = Intrinsic::x86_avx512_psllv_w_128;
3899 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3900 IID = Intrinsic::x86_avx512_psllv_w_256;
3901 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3902 IID = Intrinsic::x86_avx512_psllv_w_512;
3903 else
3904 llvm_unreachable("Unexpected size");
3905 } else if (Name.ends_with(".128")) {
3906 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3907 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3908 : Intrinsic::x86_sse2_psll_d;
3909 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3910 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3911 : Intrinsic::x86_sse2_psll_q;
3912 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3913 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3914 : Intrinsic::x86_sse2_psll_w;
3915 else
3916 llvm_unreachable("Unexpected size");
3917 } else if (Name.ends_with(".256")) {
3918 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3919 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3920 : Intrinsic::x86_avx2_psll_d;
3921 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3922 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3923 : Intrinsic::x86_avx2_psll_q;
3924 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3925 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3926 : Intrinsic::x86_avx2_psll_w;
3927 else
3928 llvm_unreachable("Unexpected size");
3929 } else {
3930 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3931 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3932 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3933 : Intrinsic::x86_avx512_psll_d_512;
3934 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3935 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3936 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3937 : Intrinsic::x86_avx512_psll_q_512;
3938 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3939 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3940 : Intrinsic::x86_avx512_psll_w_512;
3941 else
3942 llvm_unreachable("Unexpected size");
3943 }
3944
3945 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3946 } else if (Name.starts_with("avx512.mask.psrl")) {
3947 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3948 bool IsVariable = Name[16] == 'v';
3949 char Size = Name[16] == '.' ? Name[17]
3950 : Name[17] == '.' ? Name[18]
3951 : Name[18] == '.' ? Name[19]
3952 : Name[20];
3953
3954 Intrinsic::ID IID;
3955 if (IsVariable && Name[17] != '.') {
3956 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3957 IID = Intrinsic::x86_avx2_psrlv_q;
3958 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3959 IID = Intrinsic::x86_avx2_psrlv_q_256;
3960 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3961 IID = Intrinsic::x86_avx2_psrlv_d;
3962 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3963 IID = Intrinsic::x86_avx2_psrlv_d_256;
3964 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3965 IID = Intrinsic::x86_avx512_psrlv_w_128;
3966 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3967 IID = Intrinsic::x86_avx512_psrlv_w_256;
3968 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3969 IID = Intrinsic::x86_avx512_psrlv_w_512;
3970 else
3971 llvm_unreachable("Unexpected size");
3972 } else if (Name.ends_with(".128")) {
3973 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3974 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3975 : Intrinsic::x86_sse2_psrl_d;
3976 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3977 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3978 : Intrinsic::x86_sse2_psrl_q;
3979 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3980 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3981 : Intrinsic::x86_sse2_psrl_w;
3982 else
3983 llvm_unreachable("Unexpected size");
3984 } else if (Name.ends_with(".256")) {
3985 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3986 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3987 : Intrinsic::x86_avx2_psrl_d;
3988 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3989 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3990 : Intrinsic::x86_avx2_psrl_q;
3991 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3992 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3993 : Intrinsic::x86_avx2_psrl_w;
3994 else
3995 llvm_unreachable("Unexpected size");
3996 } else {
3997 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3998 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3999 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4000 : Intrinsic::x86_avx512_psrl_d_512;
4001 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4002 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4003 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4004 : Intrinsic::x86_avx512_psrl_q_512;
4005 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4006 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4007 : Intrinsic::x86_avx512_psrl_w_512;
4008 else
4009 llvm_unreachable("Unexpected size");
4010 }
4011
4012 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4013 } else if (Name.starts_with("avx512.mask.psra")) {
4014 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4015 bool IsVariable = Name[16] == 'v';
4016 char Size = Name[16] == '.' ? Name[17]
4017 : Name[17] == '.' ? Name[18]
4018 : Name[18] == '.' ? Name[19]
4019 : Name[20];
4020
4021 Intrinsic::ID IID;
4022 if (IsVariable && Name[17] != '.') {
4023 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4024 IID = Intrinsic::x86_avx2_psrav_d;
4025 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4026 IID = Intrinsic::x86_avx2_psrav_d_256;
4027 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4028 IID = Intrinsic::x86_avx512_psrav_w_128;
4029 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4030 IID = Intrinsic::x86_avx512_psrav_w_256;
4031 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4032 IID = Intrinsic::x86_avx512_psrav_w_512;
4033 else
4034 llvm_unreachable("Unexpected size");
4035 } else if (Name.ends_with(".128")) {
4036 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4037 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4038 : Intrinsic::x86_sse2_psra_d;
4039 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4040 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4041 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4042 : Intrinsic::x86_avx512_psra_q_128;
4043 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4044 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4045 : Intrinsic::x86_sse2_psra_w;
4046 else
4047 llvm_unreachable("Unexpected size");
4048 } else if (Name.ends_with(".256")) {
4049 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4050 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4051 : Intrinsic::x86_avx2_psra_d;
4052 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4053 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4054 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4055 : Intrinsic::x86_avx512_psra_q_256;
4056 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4057 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4058 : Intrinsic::x86_avx2_psra_w;
4059 else
4060 llvm_unreachable("Unexpected size");
4061 } else {
4062 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4063 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4064 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4065 : Intrinsic::x86_avx512_psra_d_512;
4066 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4067 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4068 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4069 : Intrinsic::x86_avx512_psra_q_512;
4070 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4071 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4072 : Intrinsic::x86_avx512_psra_w_512;
4073 else
4074 llvm_unreachable("Unexpected size");
4075 }
4076
4077 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4078 } else if (Name.starts_with("avx512.mask.move.s")) {
4079 Rep = upgradeMaskedMove(Builder, *CI);
4080 } else if (Name.starts_with("avx512.cvtmask2")) {
4081 Rep = upgradeMaskToInt(Builder, *CI);
4082 } else if (Name.ends_with(".movntdqa")) {
4084 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4085
4086 LoadInst *LI = Builder.CreateAlignedLoad(
4087 CI->getType(), CI->getArgOperand(0),
4089 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4090 Rep = LI;
4091 } else if (Name.starts_with("fma.vfmadd.") ||
4092 Name.starts_with("fma.vfmsub.") ||
4093 Name.starts_with("fma.vfnmadd.") ||
4094 Name.starts_with("fma.vfnmsub.")) {
4095 bool NegMul = Name[6] == 'n';
4096 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4097 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4098
4099 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4100 CI->getArgOperand(2)};
4101
4102 if (IsScalar) {
4103 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4104 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4105 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4106 }
4107
4108 if (NegMul && !IsScalar)
4109 Ops[0] = Builder.CreateFNeg(Ops[0]);
4110 if (NegMul && IsScalar)
4111 Ops[1] = Builder.CreateFNeg(Ops[1]);
4112 if (NegAcc)
4113 Ops[2] = Builder.CreateFNeg(Ops[2]);
4114
4115 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4116
4117 if (IsScalar)
4118 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4119 } else if (Name.starts_with("fma4.vfmadd.s")) {
4120 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4121 CI->getArgOperand(2)};
4122
4123 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4124 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4125 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4126
4127 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4128
4129 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4130 Rep, (uint64_t)0);
4131 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4132 Name.starts_with("avx512.maskz.vfmadd.s") ||
4133 Name.starts_with("avx512.mask3.vfmadd.s") ||
4134 Name.starts_with("avx512.mask3.vfmsub.s") ||
4135 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4136 bool IsMask3 = Name[11] == '3';
4137 bool IsMaskZ = Name[11] == 'z';
4138 // Drop the "avx512.mask." to make it easier.
4139 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4140 bool NegMul = Name[2] == 'n';
4141 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4142
4143 Value *A = CI->getArgOperand(0);
4144 Value *B = CI->getArgOperand(1);
4145 Value *C = CI->getArgOperand(2);
4146
4147 if (NegMul && (IsMask3 || IsMaskZ))
4148 A = Builder.CreateFNeg(A);
4149 if (NegMul && !(IsMask3 || IsMaskZ))
4150 B = Builder.CreateFNeg(B);
4151 if (NegAcc)
4152 C = Builder.CreateFNeg(C);
4153
4154 A = Builder.CreateExtractElement(A, (uint64_t)0);
4155 B = Builder.CreateExtractElement(B, (uint64_t)0);
4156 C = Builder.CreateExtractElement(C, (uint64_t)0);
4157
4158 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4159 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4160 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4161
4162 Intrinsic::ID IID;
4163 if (Name.back() == 'd')
4164 IID = Intrinsic::x86_avx512_vfmadd_f64;
4165 else
4166 IID = Intrinsic::x86_avx512_vfmadd_f32;
4167 Rep = Builder.CreateIntrinsic(IID, Ops);
4168 } else {
4169 Rep = Builder.CreateFMA(A, B, C);
4170 }
4171
4172 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4173 : IsMask3 ? C
4174 : A;
4175
4176 // For Mask3 with NegAcc, we need to create a new extractelement that
4177 // avoids the negation above.
4178 if (NegAcc && IsMask3)
4179 PassThru =
4180 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4181
4182 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4183 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4184 (uint64_t)0);
4185 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4186 Name.starts_with("avx512.mask.vfnmadd.p") ||
4187 Name.starts_with("avx512.mask.vfnmsub.p") ||
4188 Name.starts_with("avx512.mask3.vfmadd.p") ||
4189 Name.starts_with("avx512.mask3.vfmsub.p") ||
4190 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4191 Name.starts_with("avx512.maskz.vfmadd.p")) {
4192 bool IsMask3 = Name[11] == '3';
4193 bool IsMaskZ = Name[11] == 'z';
4194 // Drop the "avx512.mask." to make it easier.
4195 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4196 bool NegMul = Name[2] == 'n';
4197 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4198
4199 Value *A = CI->getArgOperand(0);
4200 Value *B = CI->getArgOperand(1);
4201 Value *C = CI->getArgOperand(2);
4202
4203 if (NegMul && (IsMask3 || IsMaskZ))
4204 A = Builder.CreateFNeg(A);
4205 if (NegMul && !(IsMask3 || IsMaskZ))
4206 B = Builder.CreateFNeg(B);
4207 if (NegAcc)
4208 C = Builder.CreateFNeg(C);
4209
4210 if (CI->arg_size() == 5 &&
4211 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4212 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4213 Intrinsic::ID IID;
4214 // Check the character before ".512" in string.
4215 if (Name[Name.size() - 5] == 's')
4216 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4217 else
4218 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4219
4220 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4221 } else {
4222 Rep = Builder.CreateFMA(A, B, C);
4223 }
4224
4225 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4226 : IsMask3 ? CI->getArgOperand(2)
4227 : CI->getArgOperand(0);
4228
4229 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4230 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4231 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4232 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4233 Intrinsic::ID IID;
4234 if (VecWidth == 128 && EltWidth == 32)
4235 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4236 else if (VecWidth == 256 && EltWidth == 32)
4237 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4238 else if (VecWidth == 128 && EltWidth == 64)
4239 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4240 else if (VecWidth == 256 && EltWidth == 64)
4241 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4242 else
4243 llvm_unreachable("Unexpected intrinsic");
4244
4245 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4246 CI->getArgOperand(2)};
4247 Ops[2] = Builder.CreateFNeg(Ops[2]);
4248 Rep = Builder.CreateIntrinsic(IID, Ops);
4249 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4250 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4251 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4252 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4253 bool IsMask3 = Name[11] == '3';
4254 bool IsMaskZ = Name[11] == 'z';
4255 // Drop the "avx512.mask." to make it easier.
4256 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4257 bool IsSubAdd = Name[3] == 's';
4258 if (CI->arg_size() == 5) {
4259 Intrinsic::ID IID;
4260 // Check the character before ".512" in string.
4261 if (Name[Name.size() - 5] == 's')
4262 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4263 else
4264 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4265
4266 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4267 CI->getArgOperand(2), CI->getArgOperand(4)};
4268 if (IsSubAdd)
4269 Ops[2] = Builder.CreateFNeg(Ops[2]);
4270
4271 Rep = Builder.CreateIntrinsic(IID, Ops);
4272 } else {
4273 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4274
4275 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4276 CI->getArgOperand(2)};
4277
4279 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4280 Value *Odd = Builder.CreateCall(FMA, Ops);
4281 Ops[2] = Builder.CreateFNeg(Ops[2]);
4282 Value *Even = Builder.CreateCall(FMA, Ops);
4283
4284 if (IsSubAdd)
4285 std::swap(Even, Odd);
4286
4287 SmallVector<int, 32> Idxs(NumElts);
4288 for (int i = 0; i != NumElts; ++i)
4289 Idxs[i] = i + (i % 2) * NumElts;
4290
4291 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4292 }
4293
4294 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4295 : IsMask3 ? CI->getArgOperand(2)
4296 : CI->getArgOperand(0);
4297
4298 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4299 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4300 Name.starts_with("avx512.maskz.pternlog.")) {
4301 bool ZeroMask = Name[11] == 'z';
4302 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4303 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4304 Intrinsic::ID IID;
4305 if (VecWidth == 128 && EltWidth == 32)
4306 IID = Intrinsic::x86_avx512_pternlog_d_128;
4307 else if (VecWidth == 256 && EltWidth == 32)
4308 IID = Intrinsic::x86_avx512_pternlog_d_256;
4309 else if (VecWidth == 512 && EltWidth == 32)
4310 IID = Intrinsic::x86_avx512_pternlog_d_512;
4311 else if (VecWidth == 128 && EltWidth == 64)
4312 IID = Intrinsic::x86_avx512_pternlog_q_128;
4313 else if (VecWidth == 256 && EltWidth == 64)
4314 IID = Intrinsic::x86_avx512_pternlog_q_256;
4315 else if (VecWidth == 512 && EltWidth == 64)
4316 IID = Intrinsic::x86_avx512_pternlog_q_512;
4317 else
4318 llvm_unreachable("Unexpected intrinsic");
4319
4320 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4321 CI->getArgOperand(2), CI->getArgOperand(3)};
4322 Rep = Builder.CreateIntrinsic(IID, Args);
4323 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4324 : CI->getArgOperand(0);
4325 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4326 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4327 Name.starts_with("avx512.maskz.vpmadd52")) {
4328 bool ZeroMask = Name[11] == 'z';
4329 bool High = Name[20] == 'h' || Name[21] == 'h';
4330 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4331 Intrinsic::ID IID;
4332 if (VecWidth == 128 && !High)
4333 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4334 else if (VecWidth == 256 && !High)
4335 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4336 else if (VecWidth == 512 && !High)
4337 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4338 else if (VecWidth == 128 && High)
4339 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4340 else if (VecWidth == 256 && High)
4341 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4342 else if (VecWidth == 512 && High)
4343 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4344 else
4345 llvm_unreachable("Unexpected intrinsic");
4346
4347 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4348 CI->getArgOperand(2)};
4349 Rep = Builder.CreateIntrinsic(IID, Args);
4350 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4351 : CI->getArgOperand(0);
4352 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4353 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4354 Name.starts_with("avx512.mask.vpermt2var.") ||
4355 Name.starts_with("avx512.maskz.vpermt2var.")) {
4356 bool ZeroMask = Name[11] == 'z';
4357 bool IndexForm = Name[17] == 'i';
4358 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4359 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4360 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4361 Name.starts_with("avx512.mask.vpdpbusds.") ||
4362 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4363 bool ZeroMask = Name[11] == 'z';
4364 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4365 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4366 Intrinsic::ID IID;
4367 if (VecWidth == 128 && !IsSaturating)
4368 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4369 else if (VecWidth == 256 && !IsSaturating)
4370 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4371 else if (VecWidth == 512 && !IsSaturating)
4372 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4373 else if (VecWidth == 128 && IsSaturating)
4374 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4375 else if (VecWidth == 256 && IsSaturating)
4376 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4377 else if (VecWidth == 512 && IsSaturating)
4378 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4379 else
4380 llvm_unreachable("Unexpected intrinsic");
4381
4382 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4383 CI->getArgOperand(2)};
4384
4385 // Input arguments types were incorrectly set to vectors of i32 before but
4386 // they should be vectors of i8. Insert bit cast when encountering the old
4387 // types
4388 if (Args[1]->getType()->isVectorTy() &&
4389 cast<VectorType>(Args[1]->getType())
4390 ->getElementType()
4391 ->isIntegerTy(32) &&
4392 Args[2]->getType()->isVectorTy() &&
4393 cast<VectorType>(Args[2]->getType())
4394 ->getElementType()
4395 ->isIntegerTy(32)) {
4396 Type *NewArgType = nullptr;
4397 if (VecWidth == 128)
4398 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4399 else if (VecWidth == 256)
4400 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4401 else if (VecWidth == 512)
4402 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4403 else
4404 llvm_unreachable("Unexpected vector bit width");
4405
4406 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4407 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4408 }
4409
4410 Rep = Builder.CreateIntrinsic(IID, Args);
4411 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4412 : CI->getArgOperand(0);
4413 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4414 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4415 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4416 Name.starts_with("avx512.mask.vpdpwssds.") ||
4417 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4418 bool ZeroMask = Name[11] == 'z';
4419 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4420 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4421 Intrinsic::ID IID;
4422 if (VecWidth == 128 && !IsSaturating)
4423 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4424 else if (VecWidth == 256 && !IsSaturating)
4425 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4426 else if (VecWidth == 512 && !IsSaturating)
4427 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4428 else if (VecWidth == 128 && IsSaturating)
4429 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4430 else if (VecWidth == 256 && IsSaturating)
4431 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4432 else if (VecWidth == 512 && IsSaturating)
4433 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4434 else
4435 llvm_unreachable("Unexpected intrinsic");
4436
4437 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4438 CI->getArgOperand(2)};
4439
4440 // Input arguments types were incorrectly set to vectors of i32 before but
4441 // they should be vectors of i16. Insert bit cast when encountering the old
4442 // types
4443 if (Args[1]->getType()->isVectorTy() &&
4444 cast<VectorType>(Args[1]->getType())
4445 ->getElementType()
4446 ->isIntegerTy(32) &&
4447 Args[2]->getType()->isVectorTy() &&
4448 cast<VectorType>(Args[2]->getType())
4449 ->getElementType()
4450 ->isIntegerTy(32)) {
4451 Type *NewArgType = nullptr;
4452 if (VecWidth == 128)
4453 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4454 else if (VecWidth == 256)
4455 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4456 else if (VecWidth == 512)
4457 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4458 else
4459 llvm_unreachable("Unexpected vector bit width");
4460
4461 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4462 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4463 }
4464
4465 Rep = Builder.CreateIntrinsic(IID, Args);
4466 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4467 : CI->getArgOperand(0);
4468 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4469 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4470 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4471 Name == "subborrow.u32" || Name == "subborrow.u64") {
4472 Intrinsic::ID IID;
4473 if (Name[0] == 'a' && Name.back() == '2')
4474 IID = Intrinsic::x86_addcarry_32;
4475 else if (Name[0] == 'a' && Name.back() == '4')
4476 IID = Intrinsic::x86_addcarry_64;
4477 else if (Name[0] == 's' && Name.back() == '2')
4478 IID = Intrinsic::x86_subborrow_32;
4479 else if (Name[0] == 's' && Name.back() == '4')
4480 IID = Intrinsic::x86_subborrow_64;
4481 else
4482 llvm_unreachable("Unexpected intrinsic");
4483
4484 // Make a call with 3 operands.
4485 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4486 CI->getArgOperand(2)};
4487 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4488
4489 // Extract the second result and store it.
4490 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4491 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4492 // Replace the original call result with the first result of the new call.
4493 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4494
4495 CI->replaceAllUsesWith(CF);
4496 Rep = nullptr;
4497 } else if (Name.starts_with("avx512.mask.") &&
4498 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4499 // Rep will be updated by the call in the condition.
4500 }
4501
4502 return Rep;
4503}
4504
4506 Function *F, IRBuilder<> &Builder) {
4507 if (Name.starts_with("neon.bfcvt")) {
4508 if (Name.starts_with("neon.bfcvtn2")) {
4509 SmallVector<int, 32> LoMask(4);
4510 std::iota(LoMask.begin(), LoMask.end(), 0);
4511 SmallVector<int, 32> ConcatMask(8);
4512 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4513 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4514 Value *Trunc =
4515 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4516 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4517 } else if (Name.starts_with("neon.bfcvtn")) {
4518 SmallVector<int, 32> ConcatMask(8);
4519 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4520 Type *V4BF16 =
4521 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4522 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4523 dbgs() << "Trunc: " << *Trunc << "\n";
4524 return Builder.CreateShuffleVector(
4525 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4526 } else {
4527 return Builder.CreateFPTrunc(CI->getOperand(0),
4528 Type::getBFloatTy(F->getContext()));
4529 }
4530 } else if (Name.starts_with("sve.fcvt")) {
4531 Intrinsic::ID NewID =
4533 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4534 .Case("sve.fcvtnt.bf16f32",
4535 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4537 if (NewID == Intrinsic::not_intrinsic)
4538 llvm_unreachable("Unhandled Intrinsic!");
4539
4540 SmallVector<Value *, 3> Args(CI->args());
4541
4542 // The original intrinsics incorrectly used a predicate based on the
4543 // smallest element type rather than the largest.
4544 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4545 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4546
4547 if (Args[1]->getType() != BadPredTy)
4548 llvm_unreachable("Unexpected predicate type!");
4549
4550 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4551 BadPredTy, Args[1]);
4552 Args[1] = Builder.CreateIntrinsic(
4553 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4554
4555 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4556 CI->getName());
4557 }
4558
4559 llvm_unreachable("Unhandled Intrinsic!");
4560}
4561
4563 IRBuilder<> &Builder) {
4564 if (Name == "mve.vctp64.old") {
4565 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4566 // correct type.
4567 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4568 CI->getArgOperand(0),
4569 /*FMFSource=*/nullptr, CI->getName());
4570 Value *C1 = Builder.CreateIntrinsic(
4571 Intrinsic::arm_mve_pred_v2i,
4572 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4573 return Builder.CreateIntrinsic(
4574 Intrinsic::arm_mve_pred_i2v,
4575 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4576 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4577 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4578 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4579 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4580 Name ==
4581 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4582 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4583 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4584 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4585 Name ==
4586 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4587 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4588 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4589 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4590 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4591 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4592 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4593 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4594 std::vector<Type *> Tys;
4595 unsigned ID = CI->getIntrinsicID();
4596 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4597 switch (ID) {
4598 case Intrinsic::arm_mve_mull_int_predicated:
4599 case Intrinsic::arm_mve_vqdmull_predicated:
4600 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4601 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4602 break;
4603 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4604 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4605 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4606 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4607 V2I1Ty};
4608 break;
4609 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4610 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4611 CI->getOperand(1)->getType(), V2I1Ty};
4612 break;
4613 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4614 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4615 CI->getOperand(2)->getType(), V2I1Ty};
4616 break;
4617 case Intrinsic::arm_cde_vcx1q_predicated:
4618 case Intrinsic::arm_cde_vcx1qa_predicated:
4619 case Intrinsic::arm_cde_vcx2q_predicated:
4620 case Intrinsic::arm_cde_vcx2qa_predicated:
4621 case Intrinsic::arm_cde_vcx3q_predicated:
4622 case Intrinsic::arm_cde_vcx3qa_predicated:
4623 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4624 break;
4625 default:
4626 llvm_unreachable("Unhandled Intrinsic!");
4627 }
4628
4629 std::vector<Value *> Ops;
4630 for (Value *Op : CI->args()) {
4631 Type *Ty = Op->getType();
4632 if (Ty->getScalarSizeInBits() == 1) {
4633 Value *C1 = Builder.CreateIntrinsic(
4634 Intrinsic::arm_mve_pred_v2i,
4635 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4636 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4637 }
4638 Ops.push_back(Op);
4639 }
4640
4641 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4642 CI->getName());
4643 }
4644 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4645}
4646
4647// These are expected to have the arguments:
4648// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4649//
4650// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4651//
4653 Function *F, IRBuilder<> &Builder) {
4654 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4655 // for compatibility.
4656 auto UpgradeLegacyWMMAIUIntrinsicCall =
4657 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4658 ArrayRef<Type *> OverloadTys) -> Value * {
4659 // Prepare arguments, append clamp=0 for compatibility
4660 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4661 Args.push_back(Builder.getFalse());
4662
4663 // Insert the declaration for the right overload types
4665 F->getParent(), F->getIntrinsicID(), OverloadTys);
4666
4667 // Copy operand bundles if any
4669 CI->getOperandBundlesAsDefs(Bundles);
4670
4671 // Create the new call and copy calling properties
4672 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4673 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4674 NewCall->setCallingConv(CI->getCallingConv());
4675 NewCall->setAttributes(CI->getAttributes());
4676 NewCall->setDebugLoc(CI->getDebugLoc());
4677 NewCall->copyMetadata(*CI);
4678 return NewCall;
4679 };
4680
4681 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4682 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4683 "intrinsic should have 7 arguments");
4684 Type *T1 = CI->getArgOperand(4)->getType();
4685 Type *T2 = CI->getArgOperand(1)->getType();
4686 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4687 }
4688 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4689 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4690 "intrinsic should have 8 arguments");
4691 Type *T1 = CI->getArgOperand(4)->getType();
4692 Type *T2 = CI->getArgOperand(1)->getType();
4693 Type *T3 = CI->getArgOperand(3)->getType();
4694 Type *T4 = CI->getArgOperand(5)->getType();
4695 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4696 }
4697
4698 AtomicRMWInst::BinOp RMWOp =
4700 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4701 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4702 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4703 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4704 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4705 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4706 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4707 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4708 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4709 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4710 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4711 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4712 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4713
4714 unsigned NumOperands = CI->getNumOperands();
4715 if (NumOperands < 3) // Malformed bitcode.
4716 return nullptr;
4717
4718 Value *Ptr = CI->getArgOperand(0);
4719 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4720 if (!PtrTy) // Malformed.
4721 return nullptr;
4722
4723 Value *Val = CI->getArgOperand(1);
4724 if (Val->getType() != CI->getType()) // Malformed.
4725 return nullptr;
4726
4727 ConstantInt *OrderArg = nullptr;
4728 bool IsVolatile = false;
4729
4730 // These should have 5 arguments (plus the callee). A separate version of the
4731 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4732 if (NumOperands > 3)
4733 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4734
4735 // Ignore scope argument at 3
4736
4737 if (NumOperands > 5) {
4738 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4739 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4740 }
4741
4743 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4744 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4747
4748 LLVMContext &Ctx = F->getContext();
4749
4750 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4751 Type *RetTy = CI->getType();
4752 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4753 if (VT->getElementType()->isIntegerTy(16)) {
4754 VectorType *AsBF16 =
4755 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4756 Val = Builder.CreateBitCast(Val, AsBF16);
4757 }
4758 }
4759
4760 // The scope argument never really worked correctly. Use agent as the most
4761 // conservative option which should still always produce the instruction.
4762 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4763 AtomicRMWInst *RMW =
4764 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4765
4766 unsigned AddrSpace = PtrTy->getAddressSpace();
4767 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4768 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4769 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4770 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4771 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4772 }
4773
4774 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4775 MDBuilder MDB(F->getContext());
4776 MDNode *RangeNotPrivate =
4779 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4780 }
4781
4782 if (IsVolatile)
4783 RMW->setVolatile(true);
4784
4785 return Builder.CreateBitCast(RMW, RetTy);
4786}
4787
4788/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4789/// plain MDNode, as it's the verifier's job to check these are the correct
4790/// types later.
4791static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4792 if (Op < CI->arg_size()) {
4793 if (MetadataAsValue *MAV =
4795 Metadata *MD = MAV->getMetadata();
4796 return dyn_cast_if_present<MDNode>(MD);
4797 }
4798 }
4799 return nullptr;
4800}
4801
4802/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4803static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4804 if (Op < CI->arg_size())
4806 return MAV->getMetadata();
4807 return nullptr;
4808}
4809
4811 // The MDNode attached to this instruction might not be the correct type,
4812 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4813 return I->getDebugLoc().getAsMDNode();
4814}
4815
4816/// Convert debug intrinsic calls to non-instruction debug records.
4817/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4818/// \p CI - The debug intrinsic call.
4820 DbgRecord *DR = nullptr;
4821 if (Name == "label") {
4823 CI->getDebugLoc());
4824 } else if (Name == "assign") {
4827 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4828 unwrapMAVMetadataOp(CI, 4),
4829 /*The address is a Value ref, it will be stored as a Metadata */
4830 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4831 } else if (Name == "declare") {
4834 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4835 getDebugLocSafe(CI));
4836 } else if (Name == "addr") {
4837 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4838 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4839 // Don't try to add something to the expression if it's not an expression.
4840 // Instead, allow the verifier to fail later.
4841 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4842 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4843 }
4846 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4847 getDebugLocSafe(CI));
4848 } else if (Name == "value") {
4849 // An old version of dbg.value had an extra offset argument.
4850 unsigned VarOp = 1;
4851 unsigned ExprOp = 2;
4852 if (CI->arg_size() == 4) {
4854 // Nonzero offset dbg.values get dropped without a replacement.
4855 if (!Offset || !Offset->isZeroValue())
4856 return;
4857 VarOp = 2;
4858 ExprOp = 3;
4859 }
4862 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4863 nullptr, getDebugLocSafe(CI));
4864 }
4865 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4866 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4867}
4868
4871 if (!Offset)
4872 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4873 int64_t OffsetVal = Offset->getSExtValue();
4874 return Builder.CreateIntrinsic(OffsetVal >= 0
4875 ? Intrinsic::vector_splice_left
4876 : Intrinsic::vector_splice_right,
4877 CI->getType(),
4878 {CI->getArgOperand(0), CI->getArgOperand(1),
4879 Builder.getInt32(std::abs(OffsetVal))});
4880}
4881
4883 Function *F, IRBuilder<> &Builder) {
4884 if (Name.starts_with("to.fp16")) {
4885 Value *Cast =
4886 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
4887 return Builder.CreateBitCast(Cast, CI->getType());
4888 }
4889
4890 if (Name.starts_with("from.fp16")) {
4891 Value *Cast =
4892 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
4893 return Builder.CreateFPExt(Cast, CI->getType());
4894 }
4895
4896 return nullptr;
4897}
4898
4899/// Upgrade a call to an old intrinsic. All argument and return casting must be
4900/// provided to seamlessly integrate with existing context.
4902 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4903 // checks the callee's function type matches. It's likely we need to handle
4904 // type changes here.
4906 if (!F)
4907 return;
4908
4909 LLVMContext &C = CI->getContext();
4910 IRBuilder<> Builder(C);
4911 if (isa<FPMathOperator>(CI))
4912 Builder.setFastMathFlags(CI->getFastMathFlags());
4913 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4914
4915 if (!NewFn) {
4916 // Get the Function's name.
4917 StringRef Name = F->getName();
4918 if (!Name.consume_front("llvm."))
4919 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
4920
4921 bool IsX86 = Name.consume_front("x86.");
4922 bool IsNVVM = Name.consume_front("nvvm.");
4923 bool IsAArch64 = Name.consume_front("aarch64.");
4924 bool IsARM = Name.consume_front("arm.");
4925 bool IsAMDGCN = Name.consume_front("amdgcn.");
4926 bool IsDbg = Name.consume_front("dbg.");
4927 bool IsOldSplice =
4928 (Name.consume_front("experimental.vector.splice") ||
4929 Name.consume_front("vector.splice")) &&
4930 !(Name.starts_with(".left") || Name.starts_with(".right"));
4931 Value *Rep = nullptr;
4932
4933 if (!IsX86 && Name == "stackprotectorcheck") {
4934 Rep = nullptr;
4935 } else if (IsNVVM) {
4936 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4937 } else if (IsX86) {
4938 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4939 } else if (IsAArch64) {
4940 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4941 } else if (IsARM) {
4942 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4943 } else if (IsAMDGCN) {
4944 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4945 } else if (IsDbg) {
4947 } else if (IsOldSplice) {
4948 Rep = upgradeVectorSplice(CI, Builder);
4949 } else if (Name.consume_front("convert.")) {
4950 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
4951 } else {
4952 llvm_unreachable("Unknown function for CallBase upgrade.");
4953 }
4954
4955 if (Rep)
4956 CI->replaceAllUsesWith(Rep);
4957 CI->eraseFromParent();
4958 return;
4959 }
4960
4961 const auto &DefaultCase = [&]() -> void {
4962 if (F == NewFn)
4963 return;
4964
4965 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4966 // Handle generic mangling change.
4967 assert(
4968 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4969 "Unknown function for CallBase upgrade and isn't just a name change");
4970 CI->setCalledFunction(NewFn);
4971 return;
4972 }
4973
4974 // This must be an upgrade from a named to a literal struct.
4975 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4976 assert(OldST != NewFn->getReturnType() &&
4977 "Return type must have changed");
4978 assert(OldST->getNumElements() ==
4979 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4980 "Must have same number of elements");
4981
4982 SmallVector<Value *> Args(CI->args());
4983 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4984 NewCI->setAttributes(CI->getAttributes());
4985 Value *Res = PoisonValue::get(OldST);
4986 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4987 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4988 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4989 }
4990 CI->replaceAllUsesWith(Res);
4991 CI->eraseFromParent();
4992 return;
4993 }
4994
4995 // We're probably about to produce something invalid. Let the verifier catch
4996 // it instead of dying here.
4997 CI->setCalledOperand(
4999 return;
5000 };
5001 CallInst *NewCall = nullptr;
5002 switch (NewFn->getIntrinsicID()) {
5003 default: {
5004 DefaultCase();
5005 return;
5006 }
5007 case Intrinsic::arm_neon_vst1:
5008 case Intrinsic::arm_neon_vst2:
5009 case Intrinsic::arm_neon_vst3:
5010 case Intrinsic::arm_neon_vst4:
5011 case Intrinsic::arm_neon_vst2lane:
5012 case Intrinsic::arm_neon_vst3lane:
5013 case Intrinsic::arm_neon_vst4lane: {
5014 SmallVector<Value *, 4> Args(CI->args());
5015 NewCall = Builder.CreateCall(NewFn, Args);
5016 break;
5017 }
5018 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5019 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5020 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5021 LLVMContext &Ctx = F->getParent()->getContext();
5022 SmallVector<Value *, 4> Args(CI->args());
5023 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5024 cast<ConstantInt>(Args[3])->getZExtValue());
5025 NewCall = Builder.CreateCall(NewFn, Args);
5026 break;
5027 }
5028 case Intrinsic::aarch64_sve_ld3_sret:
5029 case Intrinsic::aarch64_sve_ld4_sret:
5030 case Intrinsic::aarch64_sve_ld2_sret: {
5031 StringRef Name = F->getName();
5032 Name = Name.substr(5);
5033 unsigned N = StringSwitch<unsigned>(Name)
5034 .StartsWith("aarch64.sve.ld2", 2)
5035 .StartsWith("aarch64.sve.ld3", 3)
5036 .StartsWith("aarch64.sve.ld4", 4)
5037 .Default(0);
5038 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5039 unsigned MinElts = RetTy->getMinNumElements() / N;
5040 SmallVector<Value *, 2> Args(CI->args());
5041 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5042 Value *Ret = llvm::PoisonValue::get(RetTy);
5043 for (unsigned I = 0; I < N; I++) {
5044 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5045 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5046 }
5047 NewCall = dyn_cast<CallInst>(Ret);
5048 break;
5049 }
5050
5051 case Intrinsic::coro_end: {
5052 SmallVector<Value *, 3> Args(CI->args());
5053 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5054 NewCall = Builder.CreateCall(NewFn, Args);
5055 break;
5056 }
5057
5058 case Intrinsic::vector_extract: {
5059 StringRef Name = F->getName();
5060 Name = Name.substr(5); // Strip llvm
5061 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5062 DefaultCase();
5063 return;
5064 }
5065 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5066 unsigned MinElts = RetTy->getMinNumElements();
5067 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5068 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5069 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5070 break;
5071 }
5072
5073 case Intrinsic::vector_insert: {
5074 StringRef Name = F->getName();
5075 Name = Name.substr(5);
5076 if (!Name.starts_with("aarch64.sve.tuple")) {
5077 DefaultCase();
5078 return;
5079 }
5080 if (Name.starts_with("aarch64.sve.tuple.set")) {
5081 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5082 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5083 Value *NewIdx =
5084 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5085 NewCall = Builder.CreateCall(
5086 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5087 break;
5088 }
5089 if (Name.starts_with("aarch64.sve.tuple.create")) {
5090 unsigned N = StringSwitch<unsigned>(Name)
5091 .StartsWith("aarch64.sve.tuple.create2", 2)
5092 .StartsWith("aarch64.sve.tuple.create3", 3)
5093 .StartsWith("aarch64.sve.tuple.create4", 4)
5094 .Default(0);
5095 assert(N > 1 && "Create is expected to be between 2-4");
5096 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5097 Value *Ret = llvm::PoisonValue::get(RetTy);
5098 unsigned MinElts = RetTy->getMinNumElements() / N;
5099 for (unsigned I = 0; I < N; I++) {
5100 Value *V = CI->getArgOperand(I);
5101 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5102 }
5103 NewCall = dyn_cast<CallInst>(Ret);
5104 }
5105 break;
5106 }
5107
5108 case Intrinsic::arm_neon_bfdot:
5109 case Intrinsic::arm_neon_bfmmla:
5110 case Intrinsic::arm_neon_bfmlalb:
5111 case Intrinsic::arm_neon_bfmlalt:
5112 case Intrinsic::aarch64_neon_bfdot:
5113 case Intrinsic::aarch64_neon_bfmmla:
5114 case Intrinsic::aarch64_neon_bfmlalb:
5115 case Intrinsic::aarch64_neon_bfmlalt: {
5117 assert(CI->arg_size() == 3 &&
5118 "Mismatch between function args and call args");
5119 size_t OperandWidth =
5121 assert((OperandWidth == 64 || OperandWidth == 128) &&
5122 "Unexpected operand width");
5123 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5124 auto Iter = CI->args().begin();
5125 Args.push_back(*Iter++);
5126 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5127 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5128 NewCall = Builder.CreateCall(NewFn, Args);
5129 break;
5130 }
5131
5132 case Intrinsic::bitreverse:
5133 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5134 break;
5135
5136 case Intrinsic::ctlz:
5137 case Intrinsic::cttz: {
5138 if (CI->arg_size() != 1) {
5139 DefaultCase();
5140 return;
5141 }
5142
5143 NewCall =
5144 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5145 break;
5146 }
5147
5148 case Intrinsic::objectsize: {
5149 Value *NullIsUnknownSize =
5150 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5151 Value *Dynamic =
5152 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5153 NewCall = Builder.CreateCall(
5154 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5155 break;
5156 }
5157
5158 case Intrinsic::ctpop:
5159 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5160 break;
5161 case Intrinsic::dbg_value: {
5162 StringRef Name = F->getName();
5163 Name = Name.substr(5); // Strip llvm.
5164 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5165 if (Name.starts_with("dbg.addr")) {
5167 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5168 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5169 NewCall =
5170 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5171 MetadataAsValue::get(C, Expr)});
5172 break;
5173 }
5174
5175 // Upgrade from the old version that had an extra offset argument.
5176 assert(CI->arg_size() == 4);
5177 // Drop nonzero offsets instead of attempting to upgrade them.
5179 if (Offset->isZeroValue()) {
5180 NewCall = Builder.CreateCall(
5181 NewFn,
5182 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5183 break;
5184 }
5185 CI->eraseFromParent();
5186 return;
5187 }
5188
5189 case Intrinsic::ptr_annotation:
5190 // Upgrade from versions that lacked the annotation attribute argument.
5191 if (CI->arg_size() != 4) {
5192 DefaultCase();
5193 return;
5194 }
5195
5196 // Create a new call with an added null annotation attribute argument.
5197 NewCall = Builder.CreateCall(
5198 NewFn,
5199 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5200 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5201 NewCall->takeName(CI);
5202 CI->replaceAllUsesWith(NewCall);
5203 CI->eraseFromParent();
5204 return;
5205
5206 case Intrinsic::var_annotation:
5207 // Upgrade from versions that lacked the annotation attribute argument.
5208 if (CI->arg_size() != 4) {
5209 DefaultCase();
5210 return;
5211 }
5212 // Create a new call with an added null annotation attribute argument.
5213 NewCall = Builder.CreateCall(
5214 NewFn,
5215 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5216 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5217 NewCall->takeName(CI);
5218 CI->replaceAllUsesWith(NewCall);
5219 CI->eraseFromParent();
5220 return;
5221
5222 case Intrinsic::riscv_aes32dsi:
5223 case Intrinsic::riscv_aes32dsmi:
5224 case Intrinsic::riscv_aes32esi:
5225 case Intrinsic::riscv_aes32esmi:
5226 case Intrinsic::riscv_sm4ks:
5227 case Intrinsic::riscv_sm4ed: {
5228 // The last argument to these intrinsics used to be i8 and changed to i32.
5229 // The type overload for sm4ks and sm4ed was removed.
5230 Value *Arg2 = CI->getArgOperand(2);
5231 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5232 return;
5233
5234 Value *Arg0 = CI->getArgOperand(0);
5235 Value *Arg1 = CI->getArgOperand(1);
5236 if (CI->getType()->isIntegerTy(64)) {
5237 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5238 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5239 }
5240
5241 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5242 cast<ConstantInt>(Arg2)->getZExtValue());
5243
5244 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5245 Value *Res = NewCall;
5246 if (Res->getType() != CI->getType())
5247 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5248 NewCall->takeName(CI);
5249 CI->replaceAllUsesWith(Res);
5250 CI->eraseFromParent();
5251 return;
5252 }
5253 case Intrinsic::nvvm_mapa_shared_cluster: {
5254 // Create a new call with the correct address space.
5255 NewCall =
5256 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5257 Value *Res = NewCall;
5258 Res = Builder.CreateAddrSpaceCast(
5259 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5260 NewCall->takeName(CI);
5261 CI->replaceAllUsesWith(Res);
5262 CI->eraseFromParent();
5263 return;
5264 }
5265 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5266 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5267 // Create a new call with the correct address space.
5268 SmallVector<Value *, 4> Args(CI->args());
5269 Args[0] = Builder.CreateAddrSpaceCast(
5270 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5271
5272 NewCall = Builder.CreateCall(NewFn, Args);
5273 NewCall->takeName(CI);
5274 CI->replaceAllUsesWith(NewCall);
5275 CI->eraseFromParent();
5276 return;
5277 }
5278 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5279 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5280 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5281 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5282 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5283 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5284 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5285 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5286 SmallVector<Value *, 16> Args(CI->args());
5287
5288 // Create AddrSpaceCast to shared_cluster if needed.
5289 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5290 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5292 Args[0] = Builder.CreateAddrSpaceCast(
5293 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5294
5295 // Attach the flag argument for cta_group, with a
5296 // default value of 0. This handles case (2) in
5297 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5298 size_t NumArgs = CI->arg_size();
5299 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5300 if (!FlagArg->getType()->isIntegerTy(1))
5301 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5302
5303 NewCall = Builder.CreateCall(NewFn, Args);
5304 NewCall->takeName(CI);
5305 CI->replaceAllUsesWith(NewCall);
5306 CI->eraseFromParent();
5307 return;
5308 }
5309 case Intrinsic::riscv_sha256sig0:
5310 case Intrinsic::riscv_sha256sig1:
5311 case Intrinsic::riscv_sha256sum0:
5312 case Intrinsic::riscv_sha256sum1:
5313 case Intrinsic::riscv_sm3p0:
5314 case Intrinsic::riscv_sm3p1: {
5315 // The last argument to these intrinsics used to be i8 and changed to i32.
5316 // The type overload for sm4ks and sm4ed was removed.
5317 if (!CI->getType()->isIntegerTy(64))
5318 return;
5319
5320 Value *Arg =
5321 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5322
5323 NewCall = Builder.CreateCall(NewFn, Arg);
5324 Value *Res =
5325 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5326 NewCall->takeName(CI);
5327 CI->replaceAllUsesWith(Res);
5328 CI->eraseFromParent();
5329 return;
5330 }
5331
5332 case Intrinsic::x86_xop_vfrcz_ss:
5333 case Intrinsic::x86_xop_vfrcz_sd:
5334 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5335 break;
5336
5337 case Intrinsic::x86_xop_vpermil2pd:
5338 case Intrinsic::x86_xop_vpermil2ps:
5339 case Intrinsic::x86_xop_vpermil2pd_256:
5340 case Intrinsic::x86_xop_vpermil2ps_256: {
5341 SmallVector<Value *, 4> Args(CI->args());
5342 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5343 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5344 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5345 NewCall = Builder.CreateCall(NewFn, Args);
5346 break;
5347 }
5348
5349 case Intrinsic::x86_sse41_ptestc:
5350 case Intrinsic::x86_sse41_ptestz:
5351 case Intrinsic::x86_sse41_ptestnzc: {
5352 // The arguments for these intrinsics used to be v4f32, and changed
5353 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5354 // So, the only thing required is a bitcast for both arguments.
5355 // First, check the arguments have the old type.
5356 Value *Arg0 = CI->getArgOperand(0);
5357 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5358 return;
5359
5360 // Old intrinsic, add bitcasts
5361 Value *Arg1 = CI->getArgOperand(1);
5362
5363 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5364
5365 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5366 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5367
5368 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5369 break;
5370 }
5371
5372 case Intrinsic::x86_rdtscp: {
5373 // This used to take 1 arguments. If we have no arguments, it is already
5374 // upgraded.
5375 if (CI->getNumOperands() == 0)
5376 return;
5377
5378 NewCall = Builder.CreateCall(NewFn);
5379 // Extract the second result and store it.
5380 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5381 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5382 // Replace the original call result with the first result of the new call.
5383 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5384
5385 NewCall->takeName(CI);
5386 CI->replaceAllUsesWith(TSC);
5387 CI->eraseFromParent();
5388 return;
5389 }
5390
5391 case Intrinsic::x86_sse41_insertps:
5392 case Intrinsic::x86_sse41_dppd:
5393 case Intrinsic::x86_sse41_dpps:
5394 case Intrinsic::x86_sse41_mpsadbw:
5395 case Intrinsic::x86_avx_dp_ps_256:
5396 case Intrinsic::x86_avx2_mpsadbw: {
5397 // Need to truncate the last argument from i32 to i8 -- this argument models
5398 // an inherently 8-bit immediate operand to these x86 instructions.
5399 SmallVector<Value *, 4> Args(CI->args());
5400
5401 // Replace the last argument with a trunc.
5402 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5403 NewCall = Builder.CreateCall(NewFn, Args);
5404 break;
5405 }
5406
5407 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5408 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5409 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5410 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5411 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5412 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5413 SmallVector<Value *, 4> Args(CI->args());
5414 unsigned NumElts =
5415 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5416 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5417
5418 NewCall = Builder.CreateCall(NewFn, Args);
5419 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5420
5421 NewCall->takeName(CI);
5422 CI->replaceAllUsesWith(Res);
5423 CI->eraseFromParent();
5424 return;
5425 }
5426
5427 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5428 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5429 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5430 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5431 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5432 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5433 SmallVector<Value *, 4> Args(CI->args());
5434 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5435 if (NewFn->getIntrinsicID() ==
5436 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5437 Args[1] = Builder.CreateBitCast(
5438 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5439
5440 NewCall = Builder.CreateCall(NewFn, Args);
5441 Value *Res = Builder.CreateBitCast(
5442 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5443
5444 NewCall->takeName(CI);
5445 CI->replaceAllUsesWith(Res);
5446 CI->eraseFromParent();
5447 return;
5448 }
5449 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5450 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5451 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5452 SmallVector<Value *, 4> Args(CI->args());
5453 unsigned NumElts =
5454 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5455 Args[1] = Builder.CreateBitCast(
5456 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5457 Args[2] = Builder.CreateBitCast(
5458 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5459
5460 NewCall = Builder.CreateCall(NewFn, Args);
5461 break;
5462 }
5463
5464 case Intrinsic::thread_pointer: {
5465 NewCall = Builder.CreateCall(NewFn, {});
5466 break;
5467 }
5468
5469 case Intrinsic::memcpy:
5470 case Intrinsic::memmove:
5471 case Intrinsic::memset: {
5472 // We have to make sure that the call signature is what we're expecting.
5473 // We only want to change the old signatures by removing the alignment arg:
5474 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5475 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5476 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5477 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5478 // Note: i8*'s in the above can be any pointer type
5479 if (CI->arg_size() != 5) {
5480 DefaultCase();
5481 return;
5482 }
5483 // Remove alignment argument (3), and add alignment attributes to the
5484 // dest/src pointers.
5485 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5486 CI->getArgOperand(2), CI->getArgOperand(4)};
5487 NewCall = Builder.CreateCall(NewFn, Args);
5488 AttributeList OldAttrs = CI->getAttributes();
5489 AttributeList NewAttrs = AttributeList::get(
5490 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5491 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5492 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5493 NewCall->setAttributes(NewAttrs);
5494 auto *MemCI = cast<MemIntrinsic>(NewCall);
5495 // All mem intrinsics support dest alignment.
5497 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5498 // Memcpy/Memmove also support source alignment.
5499 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5500 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5501 break;
5502 }
5503
5504 case Intrinsic::masked_load:
5505 case Intrinsic::masked_gather:
5506 case Intrinsic::masked_store:
5507 case Intrinsic::masked_scatter: {
5508 if (CI->arg_size() != 4) {
5509 DefaultCase();
5510 return;
5511 }
5512
5513 auto GetMaybeAlign = [](Value *Op) {
5514 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5515 uint64_t Val = CI->getZExtValue();
5516 if (Val == 0)
5517 return MaybeAlign();
5518 if (isPowerOf2_64(Val))
5519 return MaybeAlign(Val);
5520 }
5521 reportFatalUsageError("Invalid alignment argument");
5522 };
5523 auto GetAlign = [&](Value *Op) {
5524 MaybeAlign Align = GetMaybeAlign(Op);
5525 if (Align)
5526 return *Align;
5527 reportFatalUsageError("Invalid zero alignment argument");
5528 };
5529
5530 const DataLayout &DL = CI->getDataLayout();
5531 switch (NewFn->getIntrinsicID()) {
5532 case Intrinsic::masked_load:
5533 NewCall = Builder.CreateMaskedLoad(
5534 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5535 CI->getArgOperand(2), CI->getArgOperand(3));
5536 break;
5537 case Intrinsic::masked_gather:
5538 NewCall = Builder.CreateMaskedGather(
5539 CI->getType(), CI->getArgOperand(0),
5540 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5541 CI->getType()->getScalarType()),
5542 CI->getArgOperand(2), CI->getArgOperand(3));
5543 break;
5544 case Intrinsic::masked_store:
5545 NewCall = Builder.CreateMaskedStore(
5546 CI->getArgOperand(0), CI->getArgOperand(1),
5547 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5548 break;
5549 case Intrinsic::masked_scatter:
5550 NewCall = Builder.CreateMaskedScatter(
5551 CI->getArgOperand(0), CI->getArgOperand(1),
5552 DL.getValueOrABITypeAlignment(
5553 GetMaybeAlign(CI->getArgOperand(2)),
5554 CI->getArgOperand(0)->getType()->getScalarType()),
5555 CI->getArgOperand(3));
5556 break;
5557 default:
5558 llvm_unreachable("Unexpected intrinsic ID");
5559 }
5560 // Previous metadata is still valid.
5561 NewCall->copyMetadata(*CI);
5562 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5563 break;
5564 }
5565
5566 case Intrinsic::lifetime_start:
5567 case Intrinsic::lifetime_end: {
5568 if (CI->arg_size() != 2) {
5569 DefaultCase();
5570 return;
5571 }
5572
5573 Value *Ptr = CI->getArgOperand(1);
5574 // Try to strip pointer casts, such that the lifetime works on an alloca.
5575 Ptr = Ptr->stripPointerCasts();
5576 if (isa<AllocaInst>(Ptr)) {
5577 // Don't use NewFn, as we might have looked through an addrspacecast.
5578 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5579 NewCall = Builder.CreateLifetimeStart(Ptr);
5580 else
5581 NewCall = Builder.CreateLifetimeEnd(Ptr);
5582 break;
5583 }
5584
5585 // Otherwise remove the lifetime marker.
5586 CI->eraseFromParent();
5587 return;
5588 }
5589
5590 case Intrinsic::x86_avx512_vpdpbusd_128:
5591 case Intrinsic::x86_avx512_vpdpbusd_256:
5592 case Intrinsic::x86_avx512_vpdpbusd_512:
5593 case Intrinsic::x86_avx512_vpdpbusds_128:
5594 case Intrinsic::x86_avx512_vpdpbusds_256:
5595 case Intrinsic::x86_avx512_vpdpbusds_512:
5596 case Intrinsic::x86_avx2_vpdpbssd_128:
5597 case Intrinsic::x86_avx2_vpdpbssd_256:
5598 case Intrinsic::x86_avx10_vpdpbssd_512:
5599 case Intrinsic::x86_avx2_vpdpbssds_128:
5600 case Intrinsic::x86_avx2_vpdpbssds_256:
5601 case Intrinsic::x86_avx10_vpdpbssds_512:
5602 case Intrinsic::x86_avx2_vpdpbsud_128:
5603 case Intrinsic::x86_avx2_vpdpbsud_256:
5604 case Intrinsic::x86_avx10_vpdpbsud_512:
5605 case Intrinsic::x86_avx2_vpdpbsuds_128:
5606 case Intrinsic::x86_avx2_vpdpbsuds_256:
5607 case Intrinsic::x86_avx10_vpdpbsuds_512:
5608 case Intrinsic::x86_avx2_vpdpbuud_128:
5609 case Intrinsic::x86_avx2_vpdpbuud_256:
5610 case Intrinsic::x86_avx10_vpdpbuud_512:
5611 case Intrinsic::x86_avx2_vpdpbuuds_128:
5612 case Intrinsic::x86_avx2_vpdpbuuds_256:
5613 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5614 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5615 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5616 CI->getArgOperand(2)};
5617 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5618 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5619 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5620
5621 NewCall = Builder.CreateCall(NewFn, Args);
5622 break;
5623 }
5624 case Intrinsic::x86_avx512_vpdpwssd_128:
5625 case Intrinsic::x86_avx512_vpdpwssd_256:
5626 case Intrinsic::x86_avx512_vpdpwssd_512:
5627 case Intrinsic::x86_avx512_vpdpwssds_128:
5628 case Intrinsic::x86_avx512_vpdpwssds_256:
5629 case Intrinsic::x86_avx512_vpdpwssds_512:
5630 case Intrinsic::x86_avx2_vpdpwsud_128:
5631 case Intrinsic::x86_avx2_vpdpwsud_256:
5632 case Intrinsic::x86_avx10_vpdpwsud_512:
5633 case Intrinsic::x86_avx2_vpdpwsuds_128:
5634 case Intrinsic::x86_avx2_vpdpwsuds_256:
5635 case Intrinsic::x86_avx10_vpdpwsuds_512:
5636 case Intrinsic::x86_avx2_vpdpwusd_128:
5637 case Intrinsic::x86_avx2_vpdpwusd_256:
5638 case Intrinsic::x86_avx10_vpdpwusd_512:
5639 case Intrinsic::x86_avx2_vpdpwusds_128:
5640 case Intrinsic::x86_avx2_vpdpwusds_256:
5641 case Intrinsic::x86_avx10_vpdpwusds_512:
5642 case Intrinsic::x86_avx2_vpdpwuud_128:
5643 case Intrinsic::x86_avx2_vpdpwuud_256:
5644 case Intrinsic::x86_avx10_vpdpwuud_512:
5645 case Intrinsic::x86_avx2_vpdpwuuds_128:
5646 case Intrinsic::x86_avx2_vpdpwuuds_256:
5647 case Intrinsic::x86_avx10_vpdpwuuds_512:
5648 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5649 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5650 CI->getArgOperand(2)};
5651 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5652 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5653 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5654
5655 NewCall = Builder.CreateCall(NewFn, Args);
5656 break;
5657 }
5658 assert(NewCall && "Should have either set this variable or returned through "
5659 "the default case");
5660 NewCall->takeName(CI);
5661 CI->replaceAllUsesWith(NewCall);
5662 CI->eraseFromParent();
5663}
5664
5666 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5667
5668 // Check if this function should be upgraded and get the replacement function
5669 // if there is one.
5670 Function *NewFn;
5671 if (UpgradeIntrinsicFunction(F, NewFn)) {
5672 // Replace all users of the old function with the new function or new
5673 // instructions. This is not a range loop because the call is deleted.
5674 for (User *U : make_early_inc_range(F->users()))
5675 if (CallBase *CB = dyn_cast<CallBase>(U))
5676 UpgradeIntrinsicCall(CB, NewFn);
5677
5678 // Remove old function, no longer used, from the module.
5679 if (F != NewFn)
5680 F->eraseFromParent();
5681 }
5682}
5683
5685 const unsigned NumOperands = MD.getNumOperands();
5686 if (NumOperands == 0)
5687 return &MD; // Invalid, punt to a verifier error.
5688
5689 // Check if the tag uses struct-path aware TBAA format.
5690 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5691 return &MD;
5692
5693 auto &Context = MD.getContext();
5694 if (NumOperands == 3) {
5695 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5696 MDNode *ScalarType = MDNode::get(Context, Elts);
5697 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5698 Metadata *Elts2[] = {ScalarType, ScalarType,
5701 MD.getOperand(2)};
5702 return MDNode::get(Context, Elts2);
5703 }
5704 // Create a MDNode <MD, MD, offset 0>
5706 Type::getInt64Ty(Context)))};
5707 return MDNode::get(Context, Elts);
5708}
5709
5711 Instruction *&Temp) {
5712 if (Opc != Instruction::BitCast)
5713 return nullptr;
5714
5715 Temp = nullptr;
5716 Type *SrcTy = V->getType();
5717 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5718 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5719 LLVMContext &Context = V->getContext();
5720
5721 // We have no information about target data layout, so we assume that
5722 // the maximum pointer size is 64bit.
5723 Type *MidTy = Type::getInt64Ty(Context);
5724 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5725
5726 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5727 }
5728
5729 return nullptr;
5730}
5731
5733 if (Opc != Instruction::BitCast)
5734 return nullptr;
5735
5736 Type *SrcTy = C->getType();
5737 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5738 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5739 LLVMContext &Context = C->getContext();
5740
5741 // We have no information about target data layout, so we assume that
5742 // the maximum pointer size is 64bit.
5743 Type *MidTy = Type::getInt64Ty(Context);
5744
5746 DestTy);
5747 }
5748
5749 return nullptr;
5750}
5751
5752/// Check the debug info version number, if it is out-dated, drop the debug
5753/// info. Return true if module is modified.
5756 return false;
5757
5758 llvm::TimeTraceScope timeScope("Upgrade debug info");
5759 // We need to get metadata before the module is verified (i.e., getModuleFlag
5760 // makes assumptions that we haven't verified yet). Carefully extract the flag
5761 // from the metadata.
5762 unsigned Version = 0;
5763 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5764 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5765 if (Flag->getNumOperands() < 3)
5766 return false;
5767 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5768 return K->getString() == "Debug Info Version";
5769 return false;
5770 });
5771 if (OpIt != ModFlags->op_end()) {
5772 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5773 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5774 Version = CI->getZExtValue();
5775 }
5776 }
5777
5779 bool BrokenDebugInfo = false;
5780 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5781 report_fatal_error("Broken module found, compilation aborted!");
5782 if (!BrokenDebugInfo)
5783 // Everything is ok.
5784 return false;
5785 else {
5786 // Diagnose malformed debug info.
5788 M.getContext().diagnose(Diag);
5789 }
5790 }
5791 bool Modified = StripDebugInfo(M);
5793 // Diagnose a version mismatch.
5795 M.getContext().diagnose(DiagVersion);
5796 }
5797 return Modified;
5798}
5799
5800static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5801 GlobalValue *GV, const Metadata *V) {
5802 Function *F = cast<Function>(GV);
5803
5804 constexpr StringLiteral DefaultValue = "1";
5805 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5806 unsigned Length = 0;
5807
5808 if (F->hasFnAttribute(Attr)) {
5809 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5810 // parse these elements placing them into Vect3
5811 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5812 for (; Length < 3 && !S.empty(); Length++) {
5813 auto [Part, Rest] = S.split(',');
5814 Vect3[Length] = Part.trim();
5815 S = Rest;
5816 }
5817 }
5818
5819 const unsigned Dim = DimC - 'x';
5820 assert(Dim < 3 && "Unexpected dim char");
5821
5822 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5823
5824 // local variable required for StringRef in Vect3 to point to.
5825 const std::string VStr = llvm::utostr(VInt);
5826 Vect3[Dim] = VStr;
5827 Length = std::max(Length, Dim + 1);
5828
5829 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5830 F->addFnAttr(Attr, NewAttr);
5831}
5832
5833static inline bool isXYZ(StringRef S) {
5834 return S == "x" || S == "y" || S == "z";
5835}
5836
5838 const Metadata *V) {
5839 if (K == "kernel") {
5841 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5842 return true;
5843 }
5844 if (K == "align") {
5845 // V is a bitfeild specifying two 16-bit values. The alignment value is
5846 // specfied in low 16-bits, The index is specified in the high bits. For the
5847 // index, 0 indicates the return value while higher values correspond to
5848 // each parameter (idx = param + 1).
5849 const uint64_t AlignIdxValuePair =
5850 mdconst::extract<ConstantInt>(V)->getZExtValue();
5851 const unsigned Idx = (AlignIdxValuePair >> 16);
5852 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5853 cast<Function>(GV)->addAttributeAtIndex(
5854 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5855 return true;
5856 }
5857 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5858 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5859 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5860 return true;
5861 }
5862 if (K == "minctasm") {
5863 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5864 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5865 return true;
5866 }
5867 if (K == "maxnreg") {
5868 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5869 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5870 return true;
5871 }
5872 if (K.consume_front("maxntid") && isXYZ(K)) {
5873 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5874 return true;
5875 }
5876 if (K.consume_front("reqntid") && isXYZ(K)) {
5877 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5878 return true;
5879 }
5880 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5881 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5882 return true;
5883 }
5884 if (K == "grid_constant") {
5885 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5886 for (const auto &Op : cast<MDNode>(V)->operands()) {
5887 // For some reason, the index is 1-based in the metadata. Good thing we're
5888 // able to auto-upgrade it!
5889 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5890 cast<Function>(GV)->addParamAttr(Index, Attr);
5891 }
5892 return true;
5893 }
5894
5895 return false;
5896}
5897
5899 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5900 if (!NamedMD)
5901 return;
5902
5903 SmallVector<MDNode *, 8> NewNodes;
5905 for (MDNode *MD : NamedMD->operands()) {
5906 if (!SeenNodes.insert(MD).second)
5907 continue;
5908
5909 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5910 if (!GV)
5911 continue;
5912
5913 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5914
5915 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5916 // Each nvvm.annotations metadata entry will be of the following form:
5917 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5918 // start index = 1, to skip the global variable key
5919 // increment = 2, to skip the value for each property-value pairs
5920 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5921 MDString *K = cast<MDString>(MD->getOperand(j));
5922 const MDOperand &V = MD->getOperand(j + 1);
5923 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5924 if (!Upgraded)
5925 NewOperands.append({K, V});
5926 }
5927
5928 if (NewOperands.size() > 1)
5929 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5930 }
5931
5932 NamedMD->clearOperands();
5933 for (MDNode *N : NewNodes)
5934 NamedMD->addOperand(N);
5935}
5936
5937/// This checks for objc retain release marker which should be upgraded. It
5938/// returns true if module is modified.
5940 bool Changed = false;
5941 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5942 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5943 if (ModRetainReleaseMarker) {
5944 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5945 if (Op) {
5946 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5947 if (ID) {
5948 SmallVector<StringRef, 4> ValueComp;
5949 ID->getString().split(ValueComp, "#");
5950 if (ValueComp.size() == 2) {
5951 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5952 ID = MDString::get(M.getContext(), NewValue);
5953 }
5954 M.addModuleFlag(Module::Error, MarkerKey, ID);
5955 M.eraseNamedMetadata(ModRetainReleaseMarker);
5956 Changed = true;
5957 }
5958 }
5959 }
5960 return Changed;
5961}
5962
5964 // This lambda converts normal function calls to ARC runtime functions to
5965 // intrinsic calls.
5966 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5967 llvm::Intrinsic::ID IntrinsicFunc) {
5968 Function *Fn = M.getFunction(OldFunc);
5969
5970 if (!Fn)
5971 return;
5972
5973 Function *NewFn =
5974 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5975
5976 for (User *U : make_early_inc_range(Fn->users())) {
5978 if (!CI || CI->getCalledFunction() != Fn)
5979 continue;
5980
5981 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5982 FunctionType *NewFuncTy = NewFn->getFunctionType();
5984
5985 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5986 // value to the return type of the old function.
5987 if (NewFuncTy->getReturnType() != CI->getType() &&
5988 !CastInst::castIsValid(Instruction::BitCast, CI,
5989 NewFuncTy->getReturnType()))
5990 continue;
5991
5992 bool InvalidCast = false;
5993
5994 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5995 Value *Arg = CI->getArgOperand(I);
5996
5997 // Bitcast argument to the parameter type of the new function if it's
5998 // not a variadic argument.
5999 if (I < NewFuncTy->getNumParams()) {
6000 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6001 // to the parameter type of the new function.
6002 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6003 NewFuncTy->getParamType(I))) {
6004 InvalidCast = true;
6005 break;
6006 }
6007 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6008 }
6009 Args.push_back(Arg);
6010 }
6011
6012 if (InvalidCast)
6013 continue;
6014
6015 // Create a call instruction that calls the new function.
6016 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6017 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6018 NewCall->takeName(CI);
6019
6020 // Bitcast the return value back to the type of the old call.
6021 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6022
6023 if (!CI->use_empty())
6024 CI->replaceAllUsesWith(NewRetVal);
6025 CI->eraseFromParent();
6026 }
6027
6028 if (Fn->use_empty())
6029 Fn->eraseFromParent();
6030 };
6031
6032 // Unconditionally convert a call to "clang.arc.use" to a call to
6033 // "llvm.objc.clang.arc.use".
6034 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6035
6036 // Upgrade the retain release marker. If there is no need to upgrade
6037 // the marker, that means either the module is already new enough to contain
6038 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6040 return;
6041
6042 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6043 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6044 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6045 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6046 {"objc_autoreleaseReturnValue",
6047 llvm::Intrinsic::objc_autoreleaseReturnValue},
6048 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6049 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6050 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6051 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6052 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6053 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6054 {"objc_release", llvm::Intrinsic::objc_release},
6055 {"objc_retain", llvm::Intrinsic::objc_retain},
6056 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6057 {"objc_retainAutoreleaseReturnValue",
6058 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6059 {"objc_retainAutoreleasedReturnValue",
6060 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6061 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6062 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6063 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6064 {"objc_unsafeClaimAutoreleasedReturnValue",
6065 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6066 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6067 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6068 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6069 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6070 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6071 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6072 {"objc_arc_annotation_topdown_bbstart",
6073 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6074 {"objc_arc_annotation_topdown_bbend",
6075 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6076 {"objc_arc_annotation_bottomup_bbstart",
6077 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6078 {"objc_arc_annotation_bottomup_bbend",
6079 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6080
6081 for (auto &I : RuntimeFuncs)
6082 UpgradeToIntrinsic(I.first, I.second);
6083}
6084
6086 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6087 if (!ModFlags)
6088 return false;
6089
6090 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6091 bool HasSwiftVersionFlag = false;
6092 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6093 uint32_t SwiftABIVersion;
6094 auto Int8Ty = Type::getInt8Ty(M.getContext());
6095 auto Int32Ty = Type::getInt32Ty(M.getContext());
6096
6097 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6098 MDNode *Op = ModFlags->getOperand(I);
6099 if (Op->getNumOperands() != 3)
6100 continue;
6101 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6102 if (!ID)
6103 continue;
6104 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6105 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6106 Type::getInt32Ty(M.getContext()), B)),
6107 MDString::get(M.getContext(), ID->getString()),
6108 Op->getOperand(2)};
6109 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6110 Changed = true;
6111 };
6112
6113 if (ID->getString() == "Objective-C Image Info Version")
6114 HasObjCFlag = true;
6115 if (ID->getString() == "Objective-C Class Properties")
6116 HasClassProperties = true;
6117 // Upgrade PIC from Error/Max to Min.
6118 if (ID->getString() == "PIC Level") {
6119 if (auto *Behavior =
6121 uint64_t V = Behavior->getLimitedValue();
6122 if (V == Module::Error || V == Module::Max)
6123 SetBehavior(Module::Min);
6124 }
6125 }
6126 // Upgrade "PIE Level" from Error to Max.
6127 if (ID->getString() == "PIE Level")
6128 if (auto *Behavior =
6130 if (Behavior->getLimitedValue() == Module::Error)
6131 SetBehavior(Module::Max);
6132
6133 // Upgrade branch protection and return address signing module flags. The
6134 // module flag behavior for these fields were Error and now they are Min.
6135 if (ID->getString() == "branch-target-enforcement" ||
6136 ID->getString().starts_with("sign-return-address")) {
6137 if (auto *Behavior =
6139 if (Behavior->getLimitedValue() == Module::Error) {
6140 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6141 Metadata *Ops[3] = {
6142 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6143 Op->getOperand(1), Op->getOperand(2)};
6144 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6145 Changed = true;
6146 }
6147 }
6148 }
6149
6150 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6151 // section name so that llvm-lto will not complain about mismatching
6152 // module flags that is functionally the same.
6153 if (ID->getString() == "Objective-C Image Info Section") {
6154 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6155 SmallVector<StringRef, 4> ValueComp;
6156 Value->getString().split(ValueComp, " ");
6157 if (ValueComp.size() != 1) {
6158 std::string NewValue;
6159 for (auto &S : ValueComp)
6160 NewValue += S.str();
6161 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6162 MDString::get(M.getContext(), NewValue)};
6163 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6164 Changed = true;
6165 }
6166 }
6167 }
6168
6169 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6170 // If the higher bits are set, it adds new module flag for swift info.
6171 if (ID->getString() == "Objective-C Garbage Collection") {
6172 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6173 if (Md) {
6174 assert(Md->getValue() && "Expected non-empty metadata");
6175 auto Type = Md->getValue()->getType();
6176 if (Type == Int8Ty)
6177 continue;
6178 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6179 if ((Val & 0xff) != Val) {
6180 HasSwiftVersionFlag = true;
6181 SwiftABIVersion = (Val & 0xff00) >> 8;
6182 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6183 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6184 }
6185 Metadata *Ops[3] = {
6187 Op->getOperand(1),
6188 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6189 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6190 Changed = true;
6191 }
6192 }
6193
6194 if (ID->getString() == "amdgpu_code_object_version") {
6195 Metadata *Ops[3] = {
6196 Op->getOperand(0),
6197 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6198 Op->getOperand(2)};
6199 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6200 Changed = true;
6201 }
6202 }
6203
6204 // "Objective-C Class Properties" is recently added for Objective-C. We
6205 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6206 // flag of value 0, so we can correclty downgrade this flag when trying to
6207 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6208 // this module flag.
6209 if (HasObjCFlag && !HasClassProperties) {
6210 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6211 (uint32_t)0);
6212 Changed = true;
6213 }
6214
6215 if (HasSwiftVersionFlag) {
6216 M.addModuleFlag(Module::Error, "Swift ABI Version",
6217 SwiftABIVersion);
6218 M.addModuleFlag(Module::Error, "Swift Major Version",
6219 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6220 M.addModuleFlag(Module::Error, "Swift Minor Version",
6221 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6222 Changed = true;
6223 }
6224
6225 return Changed;
6226}
6227
6229 auto TrimSpaces = [](StringRef Section) -> std::string {
6230 SmallVector<StringRef, 5> Components;
6231 Section.split(Components, ',');
6232
6233 SmallString<32> Buffer;
6234 raw_svector_ostream OS(Buffer);
6235
6236 for (auto Component : Components)
6237 OS << ',' << Component.trim();
6238
6239 return std::string(OS.str().substr(1));
6240 };
6241
6242 for (auto &GV : M.globals()) {
6243 if (!GV.hasSection())
6244 continue;
6245
6246 StringRef Section = GV.getSection();
6247
6248 if (!Section.starts_with("__DATA, __objc_catlist"))
6249 continue;
6250
6251 // __DATA, __objc_catlist, regular, no_dead_strip
6252 // __DATA,__objc_catlist,regular,no_dead_strip
6253 GV.setSection(TrimSpaces(Section));
6254 }
6255}
6256
6257namespace {
6258// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6259// callsites within a function that did not also have the strictfp attribute.
6260// Since 10.0, if strict FP semantics are needed within a function, the
6261// function must have the strictfp attribute and all calls within the function
6262// must also have the strictfp attribute. This latter restriction is
6263// necessary to prevent unwanted libcall simplification when a function is
6264// being cloned (such as for inlining).
6265//
6266// The "dangling" strictfp attribute usage was only used to prevent constant
6267// folding and other libcall simplification. The nobuiltin attribute on the
6268// callsite has the same effect.
6269struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6270 StrictFPUpgradeVisitor() = default;
6271
6272 void visitCallBase(CallBase &Call) {
6273 if (!Call.isStrictFP())
6274 return;
6276 return;
6277 // If we get here, the caller doesn't have the strictfp attribute
6278 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6279 Call.removeFnAttr(Attribute::StrictFP);
6280 Call.addFnAttr(Attribute::NoBuiltin);
6281 }
6282};
6283
6284/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6285struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6286 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6287 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6288
6289 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6290 if (!RMW.isFloatingPointOperation())
6291 return;
6292
6293 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6294 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6295 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6296 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6297 }
6298};
6299} // namespace
6300
6302 // If a function definition doesn't have the strictfp attribute,
6303 // convert any callsite strictfp attributes to nobuiltin.
6304 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6305 StrictFPUpgradeVisitor SFPV;
6306 SFPV.visit(F);
6307 }
6308
6309 // Remove all incompatibile attributes from function.
6310 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6311 F.getReturnType(), F.getAttributes().getRetAttrs()));
6312 for (auto &Arg : F.args())
6313 Arg.removeAttrs(
6314 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6315
6316 // Older versions of LLVM treated an "implicit-section-name" attribute
6317 // similarly to directly setting the section on a Function.
6318 if (Attribute A = F.getFnAttribute("implicit-section-name");
6319 A.isValid() && A.isStringAttribute()) {
6320 F.setSection(A.getValueAsString());
6321 F.removeFnAttr("implicit-section-name");
6322 }
6323
6324 if (!F.empty()) {
6325 // For some reason this is called twice, and the first time is before any
6326 // instructions are loaded into the body.
6327
6328 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6329 A.isValid()) {
6330
6331 if (A.getValueAsBool()) {
6332 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6333 Visitor.visit(F);
6334 }
6335
6336 // We will leave behind dead attribute uses on external declarations, but
6337 // clang never added these to declarations anyway.
6338 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6339 }
6340 }
6341}
6342
6343// Check if the function attribute is not present and set it.
6345 StringRef Value) {
6346 if (!F.hasFnAttribute(FnAttrName))
6347 F.addFnAttr(FnAttrName, Value);
6348}
6349
6350// Check if the function attribute is not present and set it if needed.
6351// If the attribute is "false" then removes it.
6352// If the attribute is "true" resets it to a valueless attribute.
6353static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6354 if (!F.hasFnAttribute(FnAttrName)) {
6355 if (Set)
6356 F.addFnAttr(FnAttrName);
6357 } else {
6358 auto A = F.getFnAttribute(FnAttrName);
6359 if ("false" == A.getValueAsString())
6360 F.removeFnAttr(FnAttrName);
6361 else if ("true" == A.getValueAsString()) {
6362 F.removeFnAttr(FnAttrName);
6363 F.addFnAttr(FnAttrName);
6364 }
6365 }
6366}
6367
6369 Triple T(M.getTargetTriple());
6370 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6371 return;
6372
6373 uint64_t BTEValue = 0;
6374 uint64_t BPPLRValue = 0;
6375 uint64_t GCSValue = 0;
6376 uint64_t SRAValue = 0;
6377 uint64_t SRAALLValue = 0;
6378 uint64_t SRABKeyValue = 0;
6379
6380 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6381 if (ModFlags) {
6382 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6383 MDNode *Op = ModFlags->getOperand(I);
6384 if (Op->getNumOperands() != 3)
6385 continue;
6386
6387 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6388 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6389 if (!ID || !CI)
6390 continue;
6391
6392 StringRef IDStr = ID->getString();
6393 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6394 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6395 : IDStr == "guarded-control-stack" ? &GCSValue
6396 : IDStr == "sign-return-address" ? &SRAValue
6397 : IDStr == "sign-return-address-all" ? &SRAALLValue
6398 : IDStr == "sign-return-address-with-bkey"
6399 ? &SRABKeyValue
6400 : nullptr;
6401 if (!ValPtr)
6402 continue;
6403
6404 *ValPtr = CI->getZExtValue();
6405 if (*ValPtr == 2)
6406 return;
6407 }
6408 }
6409
6410 bool BTE = BTEValue == 1;
6411 bool BPPLR = BPPLRValue == 1;
6412 bool GCS = GCSValue == 1;
6413 bool SRA = SRAValue == 1;
6414
6415 StringRef SignTypeValue = "non-leaf";
6416 if (SRA && SRAALLValue == 1)
6417 SignTypeValue = "all";
6418
6419 StringRef SignKeyValue = "a_key";
6420 if (SRA && SRABKeyValue == 1)
6421 SignKeyValue = "b_key";
6422
6423 for (Function &F : M.getFunctionList()) {
6424 if (F.isDeclaration())
6425 continue;
6426
6427 if (SRA) {
6428 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6429 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6430 } else {
6431 if (auto A = F.getFnAttribute("sign-return-address");
6432 A.isValid() && "none" == A.getValueAsString()) {
6433 F.removeFnAttr("sign-return-address");
6434 F.removeFnAttr("sign-return-address-key");
6435 }
6436 }
6437 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6438 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6439 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6440 }
6441
6442 if (BTE)
6443 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6444 if (BPPLR)
6445 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6446 if (GCS)
6447 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6448 if (SRA) {
6449 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6450 if (SRAALLValue == 1)
6451 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6452 if (SRABKeyValue == 1)
6453 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6454 }
6455}
6456
6457static bool isOldLoopArgument(Metadata *MD) {
6458 auto *T = dyn_cast_or_null<MDTuple>(MD);
6459 if (!T)
6460 return false;
6461 if (T->getNumOperands() < 1)
6462 return false;
6463 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6464 if (!S)
6465 return false;
6466 return S->getString().starts_with("llvm.vectorizer.");
6467}
6468
6470 StringRef OldPrefix = "llvm.vectorizer.";
6471 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6472
6473 if (OldTag == "llvm.vectorizer.unroll")
6474 return MDString::get(C, "llvm.loop.interleave.count");
6475
6476 return MDString::get(
6477 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6478 .str());
6479}
6480
6482 auto *T = dyn_cast_or_null<MDTuple>(MD);
6483 if (!T)
6484 return MD;
6485 if (T->getNumOperands() < 1)
6486 return MD;
6487 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6488 if (!OldTag)
6489 return MD;
6490 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6491 return MD;
6492
6493 // This has an old tag. Upgrade it.
6495 Ops.reserve(T->getNumOperands());
6496 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6497 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6498 Ops.push_back(T->getOperand(I));
6499
6500 return MDTuple::get(T->getContext(), Ops);
6501}
6502
6504 auto *T = dyn_cast<MDTuple>(&N);
6505 if (!T)
6506 return &N;
6507
6508 if (none_of(T->operands(), isOldLoopArgument))
6509 return &N;
6510
6512 Ops.reserve(T->getNumOperands());
6513 for (Metadata *MD : T->operands())
6514 Ops.push_back(upgradeLoopArgument(MD));
6515
6516 return MDTuple::get(T->getContext(), Ops);
6517}
6518
6520 Triple T(TT);
6521 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6522 // the address space of globals to 1. This does not apply to SPIRV Logical.
6523 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6524 !DL.contains("-G") && !DL.starts_with("G")) {
6525 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6526 }
6527
6528 if (T.isLoongArch64() || T.isRISCV64()) {
6529 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6530 auto I = DL.find("-n64-");
6531 if (I != StringRef::npos)
6532 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6533 return DL.str();
6534 }
6535
6536 // AMDGPU data layout upgrades.
6537 std::string Res = DL.str();
6538 if (T.isAMDGPU()) {
6539 // Define address spaces for constants.
6540 if (!DL.contains("-G") && !DL.starts_with("G"))
6541 Res.append(Res.empty() ? "G1" : "-G1");
6542
6543 // AMDGCN data layout upgrades.
6544 if (T.isAMDGCN()) {
6545
6546 // Add missing non-integral declarations.
6547 // This goes before adding new address spaces to prevent incoherent string
6548 // values.
6549 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6550 Res.append("-ni:7:8:9");
6551 // Update ni:7 to ni:7:8:9.
6552 if (DL.ends_with("ni:7"))
6553 Res.append(":8:9");
6554 if (DL.ends_with("ni:7:8"))
6555 Res.append(":9");
6556
6557 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6558 // resources) An empty data layout has already been upgraded to G1 by now.
6559 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6560 Res.append("-p7:160:256:256:32");
6561 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6562 Res.append("-p8:128:128:128:48");
6563 constexpr StringRef OldP8("-p8:128:128-");
6564 if (DL.contains(OldP8))
6565 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6566 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6567 Res.append("-p9:192:256:256:32");
6568 }
6569
6570 // Upgrade the ELF mangling mode.
6571 if (!DL.contains("m:e"))
6572 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6573
6574 return Res;
6575 }
6576
6577 if (T.isSystemZ() && !DL.empty()) {
6578 // Make sure the stack alignment is present.
6579 if (!DL.contains("-S64"))
6580 return "E-S64" + DL.drop_front(1).str();
6581 return DL.str();
6582 }
6583
6584 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6585 // If the datalayout matches the expected format, add pointer size address
6586 // spaces to the datalayout.
6587 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6588 if (!DL.contains(AddrSpaces)) {
6590 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6591 if (R.match(Res, &Groups))
6592 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6593 }
6594 };
6595
6596 // AArch64 data layout upgrades.
6597 if (T.isAArch64()) {
6598 // Add "-Fn32"
6599 if (!DL.empty() && !DL.contains("-Fn32"))
6600 Res.append("-Fn32");
6601 AddPtr32Ptr64AddrSpaces();
6602 return Res;
6603 }
6604
6605 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6606 T.isWasm()) {
6607 // Mips64 with o32 ABI did not add "-i128:128".
6608 // Add "-i128:128"
6609 std::string I64 = "-i64:64";
6610 std::string I128 = "-i128:128";
6611 if (!StringRef(Res).contains(I128)) {
6612 size_t Pos = Res.find(I64);
6613 if (Pos != size_t(-1))
6614 Res.insert(Pos + I64.size(), I128);
6615 }
6616 }
6617
6618 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6619 size_t Pos = Res.find("-S128");
6620 if (Pos == StringRef::npos)
6621 Pos = Res.size();
6622 Res.insert(Pos, "-f64:32:64");
6623 }
6624
6625 if (!T.isX86())
6626 return Res;
6627
6628 AddPtr32Ptr64AddrSpaces();
6629
6630 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6631 // for i128 operations prior to this being reflected in the data layout, and
6632 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6633 // boundaries, so although this is a breaking change, the upgrade is expected
6634 // to fix more IR than it breaks.
6635 // Intel MCU is an exception and uses 4-byte-alignment.
6636 if (!T.isOSIAMCU()) {
6637 std::string I128 = "-i128:128";
6638 if (StringRef Ref = Res; !Ref.contains(I128)) {
6640 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6641 if (R.match(Res, &Groups))
6642 Res = (Groups[1] + I128 + Groups[3]).str();
6643 }
6644 }
6645
6646 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6647 // Raising the alignment is safe because Clang did not produce f80 values in
6648 // the MSVC environment before this upgrade was added.
6649 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6650 StringRef Ref = Res;
6651 auto I = Ref.find("-f80:32-");
6652 if (I != StringRef::npos)
6653 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6654 }
6655
6656 return Res;
6657}
6658
6659void llvm::UpgradeAttributes(AttrBuilder &B) {
6660 StringRef FramePointer;
6661 Attribute A = B.getAttribute("no-frame-pointer-elim");
6662 if (A.isValid()) {
6663 // The value can be "true" or "false".
6664 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6665 B.removeAttribute("no-frame-pointer-elim");
6666 }
6667 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6668 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6669 if (FramePointer != "all")
6670 FramePointer = "non-leaf";
6671 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6672 }
6673 if (!FramePointer.empty())
6674 B.addAttribute("frame-pointer", FramePointer);
6675
6676 A = B.getAttribute("null-pointer-is-valid");
6677 if (A.isValid()) {
6678 // The value can be "true" or "false".
6679 bool NullPointerIsValid = A.getValueAsString() == "true";
6680 B.removeAttribute("null-pointer-is-valid");
6681 if (NullPointerIsValid)
6682 B.addAttribute(Attribute::NullPointerIsValid);
6683 }
6684}
6685
6686void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6687 // clang.arc.attachedcall bundles are now required to have an operand.
6688 // If they don't, it's okay to drop them entirely: when there is an operand,
6689 // the "attachedcall" is meaningful and required, but without an operand,
6690 // it's just a marker NOP. Dropping it merely prevents an optimization.
6691 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6692 return OBD.getTag() == "clang.arc.attachedcall" &&
6693 OBD.inputs().empty();
6694 });
6695}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:103
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:450
size_t arg_size() const
Definition Function.h:905
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:890
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2776
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1760
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:824
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:826
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106