LLVM 22.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
51#include "llvm/Support/Regex.h"
54#include <cstdint>
55#include <cstring>
56#include <numeric>
57
58using namespace llvm;
59
60static cl::opt<bool>
61 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
62 cl::desc("Disable autoupgrade of debug info"));
63
64static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
65
66// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
67// changed their type from v4f32 to v2i64.
69 Function *&NewFn) {
70 // Check whether this is an old version of the function, which received
71 // v4f32 arguments.
72 Type *Arg0Type = F->getFunctionType()->getParamType(0);
73 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
74 return false;
75
76 // Yes, it's old, replace it with new version.
77 rename(F);
78 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
79 return true;
80}
81
82// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
83// arguments have changed their type from i32 to i8.
85 Function *&NewFn) {
86 // Check that the last argument is an i32.
87 Type *LastArgType = F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
89 if (!LastArgType->isIntegerTy(32))
90 return false;
91
92 // Move this function aside and map down.
93 rename(F);
94 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
95 return true;
96}
97
98// Upgrade the declaration of fp compare intrinsics that change return type
99// from scalar to vXi1 mask.
101 Function *&NewFn) {
102 // Check if the return type is a vector.
103 if (F->getReturnType()->isVectorTy())
104 return false;
105
106 rename(F);
107 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
108 return true;
109}
110
111// Upgrade the declaration of multiply and add bytes intrinsics whose input
112// arguments' types have changed from vectors of i32 to vectors of i8
114 Function *&NewFn) {
115 // check if input argument type is a vector of i8
116 Type *Arg1Type = F->getFunctionType()->getParamType(1);
117 Type *Arg2Type = F->getFunctionType()->getParamType(2);
118 if (Arg1Type->isVectorTy() &&
119 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
120 Arg2Type->isVectorTy() &&
121 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
122 return false;
123
124 rename(F);
125 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
126 return true;
127}
128
129// Upgrade the declaration of multipy and add words intrinsics whose input
130// arguments' types have changed to vectors of i32 to vectors of i16
132 Function *&NewFn) {
133 // check if input argument type is a vector of i16
134 Type *Arg1Type = F->getFunctionType()->getParamType(1);
135 Type *Arg2Type = F->getFunctionType()->getParamType(2);
136 if (Arg1Type->isVectorTy() &&
137 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
138 Arg2Type->isVectorTy() &&
139 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
140 return false;
141
142 rename(F);
143 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
144 return true;
145}
146
148 Function *&NewFn) {
149 if (F->getReturnType()->getScalarType()->isBFloatTy())
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 // All of the intrinsics matches below should be marked with which llvm
169 // version started autoupgrading them. At some point in the future we would
170 // like to use this information to remove upgrade code for some older
171 // intrinsics. It is currently undecided how we will determine that future
172 // point.
173 if (Name.consume_front("avx."))
174 return (Name.starts_with("blend.p") || // Added in 3.7
175 Name == "cvt.ps2.pd.256" || // Added in 3.9
176 Name == "cvtdq2.pd.256" || // Added in 3.9
177 Name == "cvtdq2.ps.256" || // Added in 7.0
178 Name.starts_with("movnt.") || // Added in 3.2
179 Name.starts_with("sqrt.p") || // Added in 7.0
180 Name.starts_with("storeu.") || // Added in 3.9
181 Name.starts_with("vbroadcast.s") || // Added in 3.5
182 Name.starts_with("vbroadcastf128") || // Added in 4.0
183 Name.starts_with("vextractf128.") || // Added in 3.7
184 Name.starts_with("vinsertf128.") || // Added in 3.7
185 Name.starts_with("vperm2f128.") || // Added in 6.0
186 Name.starts_with("vpermil.")); // Added in 3.1
187
188 if (Name.consume_front("avx2."))
189 return (Name == "movntdqa" || // Added in 5.0
190 Name.starts_with("pabs.") || // Added in 6.0
191 Name.starts_with("padds.") || // Added in 8.0
192 Name.starts_with("paddus.") || // Added in 8.0
193 Name.starts_with("pblendd.") || // Added in 3.7
194 Name == "pblendw" || // Added in 3.7
195 Name.starts_with("pbroadcast") || // Added in 3.8
196 Name.starts_with("pcmpeq.") || // Added in 3.1
197 Name.starts_with("pcmpgt.") || // Added in 3.1
198 Name.starts_with("pmax") || // Added in 3.9
199 Name.starts_with("pmin") || // Added in 3.9
200 Name.starts_with("pmovsx") || // Added in 3.9
201 Name.starts_with("pmovzx") || // Added in 3.9
202 Name == "pmul.dq" || // Added in 7.0
203 Name == "pmulu.dq" || // Added in 7.0
204 Name.starts_with("psll.dq") || // Added in 3.7
205 Name.starts_with("psrl.dq") || // Added in 3.7
206 Name.starts_with("psubs.") || // Added in 8.0
207 Name.starts_with("psubus.") || // Added in 8.0
208 Name.starts_with("vbroadcast") || // Added in 3.8
209 Name == "vbroadcasti128" || // Added in 3.7
210 Name == "vextracti128" || // Added in 3.7
211 Name == "vinserti128" || // Added in 3.7
212 Name == "vperm2i128"); // Added in 6.0
213
214 if (Name.consume_front("avx512.")) {
215 if (Name.consume_front("mask."))
216 // 'avx512.mask.*'
217 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
218 Name.starts_with("and.") || // Added in 3.9
219 Name.starts_with("andn.") || // Added in 3.9
220 Name.starts_with("broadcast.s") || // Added in 3.9
221 Name.starts_with("broadcastf32x4.") || // Added in 6.0
222 Name.starts_with("broadcastf32x8.") || // Added in 6.0
223 Name.starts_with("broadcastf64x2.") || // Added in 6.0
224 Name.starts_with("broadcastf64x4.") || // Added in 6.0
225 Name.starts_with("broadcasti32x4.") || // Added in 6.0
226 Name.starts_with("broadcasti32x8.") || // Added in 6.0
227 Name.starts_with("broadcasti64x2.") || // Added in 6.0
228 Name.starts_with("broadcasti64x4.") || // Added in 6.0
229 Name.starts_with("cmp.b") || // Added in 5.0
230 Name.starts_with("cmp.d") || // Added in 5.0
231 Name.starts_with("cmp.q") || // Added in 5.0
232 Name.starts_with("cmp.w") || // Added in 5.0
233 Name.starts_with("compress.b") || // Added in 9.0
234 Name.starts_with("compress.d") || // Added in 9.0
235 Name.starts_with("compress.p") || // Added in 9.0
236 Name.starts_with("compress.q") || // Added in 9.0
237 Name.starts_with("compress.store.") || // Added in 7.0
238 Name.starts_with("compress.w") || // Added in 9.0
239 Name.starts_with("conflict.") || // Added in 9.0
240 Name.starts_with("cvtdq2pd.") || // Added in 4.0
241 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
242 Name == "cvtpd2dq.256" || // Added in 7.0
243 Name == "cvtpd2ps.256" || // Added in 7.0
244 Name == "cvtps2pd.128" || // Added in 7.0
245 Name == "cvtps2pd.256" || // Added in 7.0
246 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
247 Name == "cvtqq2ps.256" || // Added in 9.0
248 Name == "cvtqq2ps.512" || // Added in 9.0
249 Name == "cvttpd2dq.256" || // Added in 7.0
250 Name == "cvttps2dq.128" || // Added in 7.0
251 Name == "cvttps2dq.256" || // Added in 7.0
252 Name.starts_with("cvtudq2pd.") || // Added in 4.0
253 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
254 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
255 Name == "cvtuqq2ps.256" || // Added in 9.0
256 Name == "cvtuqq2ps.512" || // Added in 9.0
257 Name.starts_with("dbpsadbw.") || // Added in 7.0
258 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
259 Name.starts_with("expand.b") || // Added in 9.0
260 Name.starts_with("expand.d") || // Added in 9.0
261 Name.starts_with("expand.load.") || // Added in 7.0
262 Name.starts_with("expand.p") || // Added in 9.0
263 Name.starts_with("expand.q") || // Added in 9.0
264 Name.starts_with("expand.w") || // Added in 9.0
265 Name.starts_with("fpclass.p") || // Added in 7.0
266 Name.starts_with("insert") || // Added in 4.0
267 Name.starts_with("load.") || // Added in 3.9
268 Name.starts_with("loadu.") || // Added in 3.9
269 Name.starts_with("lzcnt.") || // Added in 5.0
270 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
271 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
272 Name.starts_with("movddup") || // Added in 3.9
273 Name.starts_with("move.s") || // Added in 4.0
274 Name.starts_with("movshdup") || // Added in 3.9
275 Name.starts_with("movsldup") || // Added in 3.9
276 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
277 Name.starts_with("or.") || // Added in 3.9
278 Name.starts_with("pabs.") || // Added in 6.0
279 Name.starts_with("packssdw.") || // Added in 5.0
280 Name.starts_with("packsswb.") || // Added in 5.0
281 Name.starts_with("packusdw.") || // Added in 5.0
282 Name.starts_with("packuswb.") || // Added in 5.0
283 Name.starts_with("padd.") || // Added in 4.0
284 Name.starts_with("padds.") || // Added in 8.0
285 Name.starts_with("paddus.") || // Added in 8.0
286 Name.starts_with("palignr.") || // Added in 3.9
287 Name.starts_with("pand.") || // Added in 3.9
288 Name.starts_with("pandn.") || // Added in 3.9
289 Name.starts_with("pavg") || // Added in 6.0
290 Name.starts_with("pbroadcast") || // Added in 6.0
291 Name.starts_with("pcmpeq.") || // Added in 3.9
292 Name.starts_with("pcmpgt.") || // Added in 3.9
293 Name.starts_with("perm.df.") || // Added in 3.9
294 Name.starts_with("perm.di.") || // Added in 3.9
295 Name.starts_with("permvar.") || // Added in 7.0
296 Name.starts_with("pmaddubs.w.") || // Added in 7.0
297 Name.starts_with("pmaddw.d.") || // Added in 7.0
298 Name.starts_with("pmax") || // Added in 4.0
299 Name.starts_with("pmin") || // Added in 4.0
300 Name == "pmov.qd.256" || // Added in 9.0
301 Name == "pmov.qd.512" || // Added in 9.0
302 Name == "pmov.wb.256" || // Added in 9.0
303 Name == "pmov.wb.512" || // Added in 9.0
304 Name.starts_with("pmovsx") || // Added in 4.0
305 Name.starts_with("pmovzx") || // Added in 4.0
306 Name.starts_with("pmul.dq.") || // Added in 4.0
307 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
308 Name.starts_with("pmulh.w.") || // Added in 7.0
309 Name.starts_with("pmulhu.w.") || // Added in 7.0
310 Name.starts_with("pmull.") || // Added in 4.0
311 Name.starts_with("pmultishift.qb.") || // Added in 8.0
312 Name.starts_with("pmulu.dq.") || // Added in 4.0
313 Name.starts_with("por.") || // Added in 3.9
314 Name.starts_with("prol.") || // Added in 8.0
315 Name.starts_with("prolv.") || // Added in 8.0
316 Name.starts_with("pror.") || // Added in 8.0
317 Name.starts_with("prorv.") || // Added in 8.0
318 Name.starts_with("pshuf.b.") || // Added in 4.0
319 Name.starts_with("pshuf.d.") || // Added in 3.9
320 Name.starts_with("pshufh.w.") || // Added in 3.9
321 Name.starts_with("pshufl.w.") || // Added in 3.9
322 Name.starts_with("psll.d") || // Added in 4.0
323 Name.starts_with("psll.q") || // Added in 4.0
324 Name.starts_with("psll.w") || // Added in 4.0
325 Name.starts_with("pslli") || // Added in 4.0
326 Name.starts_with("psllv") || // Added in 4.0
327 Name.starts_with("psra.d") || // Added in 4.0
328 Name.starts_with("psra.q") || // Added in 4.0
329 Name.starts_with("psra.w") || // Added in 4.0
330 Name.starts_with("psrai") || // Added in 4.0
331 Name.starts_with("psrav") || // Added in 4.0
332 Name.starts_with("psrl.d") || // Added in 4.0
333 Name.starts_with("psrl.q") || // Added in 4.0
334 Name.starts_with("psrl.w") || // Added in 4.0
335 Name.starts_with("psrli") || // Added in 4.0
336 Name.starts_with("psrlv") || // Added in 4.0
337 Name.starts_with("psub.") || // Added in 4.0
338 Name.starts_with("psubs.") || // Added in 8.0
339 Name.starts_with("psubus.") || // Added in 8.0
340 Name.starts_with("pternlog.") || // Added in 7.0
341 Name.starts_with("punpckh") || // Added in 3.9
342 Name.starts_with("punpckl") || // Added in 3.9
343 Name.starts_with("pxor.") || // Added in 3.9
344 Name.starts_with("shuf.f") || // Added in 6.0
345 Name.starts_with("shuf.i") || // Added in 6.0
346 Name.starts_with("shuf.p") || // Added in 4.0
347 Name.starts_with("sqrt.p") || // Added in 7.0
348 Name.starts_with("store.b.") || // Added in 3.9
349 Name.starts_with("store.d.") || // Added in 3.9
350 Name.starts_with("store.p") || // Added in 3.9
351 Name.starts_with("store.q.") || // Added in 3.9
352 Name.starts_with("store.w.") || // Added in 3.9
353 Name == "store.ss" || // Added in 7.0
354 Name.starts_with("storeu.") || // Added in 3.9
355 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
356 Name.starts_with("ucmp.") || // Added in 5.0
357 Name.starts_with("unpckh.") || // Added in 3.9
358 Name.starts_with("unpckl.") || // Added in 3.9
359 Name.starts_with("valign.") || // Added in 4.0
360 Name == "vcvtph2ps.128" || // Added in 11.0
361 Name == "vcvtph2ps.256" || // Added in 11.0
362 Name.starts_with("vextract") || // Added in 4.0
363 Name.starts_with("vfmadd.") || // Added in 7.0
364 Name.starts_with("vfmaddsub.") || // Added in 7.0
365 Name.starts_with("vfnmadd.") || // Added in 7.0
366 Name.starts_with("vfnmsub.") || // Added in 7.0
367 Name.starts_with("vpdpbusd.") || // Added in 7.0
368 Name.starts_with("vpdpbusds.") || // Added in 7.0
369 Name.starts_with("vpdpwssd.") || // Added in 7.0
370 Name.starts_with("vpdpwssds.") || // Added in 7.0
371 Name.starts_with("vpermi2var.") || // Added in 7.0
372 Name.starts_with("vpermil.p") || // Added in 3.9
373 Name.starts_with("vpermilvar.") || // Added in 4.0
374 Name.starts_with("vpermt2var.") || // Added in 7.0
375 Name.starts_with("vpmadd52") || // Added in 7.0
376 Name.starts_with("vpshld.") || // Added in 7.0
377 Name.starts_with("vpshldv.") || // Added in 8.0
378 Name.starts_with("vpshrd.") || // Added in 7.0
379 Name.starts_with("vpshrdv.") || // Added in 8.0
380 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
381 Name.starts_with("xor.")); // Added in 3.9
382
383 if (Name.consume_front("mask3."))
384 // 'avx512.mask3.*'
385 return (Name.starts_with("vfmadd.") || // Added in 7.0
386 Name.starts_with("vfmaddsub.") || // Added in 7.0
387 Name.starts_with("vfmsub.") || // Added in 7.0
388 Name.starts_with("vfmsubadd.") || // Added in 7.0
389 Name.starts_with("vfnmsub.")); // Added in 7.0
390
391 if (Name.consume_front("maskz."))
392 // 'avx512.maskz.*'
393 return (Name.starts_with("pternlog.") || // Added in 7.0
394 Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmaddsub.") || // Added in 7.0
396 Name.starts_with("vpdpbusd.") || // Added in 7.0
397 Name.starts_with("vpdpbusds.") || // Added in 7.0
398 Name.starts_with("vpdpwssd.") || // Added in 7.0
399 Name.starts_with("vpdpwssds.") || // Added in 7.0
400 Name.starts_with("vpermt2var.") || // Added in 7.0
401 Name.starts_with("vpmadd52") || // Added in 7.0
402 Name.starts_with("vpshldv.") || // Added in 8.0
403 Name.starts_with("vpshrdv.")); // Added in 8.0
404
405 // 'avx512.*'
406 return (Name == "movntdqa" || // Added in 5.0
407 Name == "pmul.dq.512" || // Added in 7.0
408 Name == "pmulu.dq.512" || // Added in 7.0
409 Name.starts_with("broadcastm") || // Added in 6.0
410 Name.starts_with("cmp.p") || // Added in 12.0
411 Name.starts_with("cvtb2mask.") || // Added in 7.0
412 Name.starts_with("cvtd2mask.") || // Added in 7.0
413 Name.starts_with("cvtmask2") || // Added in 5.0
414 Name.starts_with("cvtq2mask.") || // Added in 7.0
415 Name == "cvtusi2sd" || // Added in 7.0
416 Name.starts_with("cvtw2mask.") || // Added in 7.0
417 Name == "kand.w" || // Added in 7.0
418 Name == "kandn.w" || // Added in 7.0
419 Name == "knot.w" || // Added in 7.0
420 Name == "kor.w" || // Added in 7.0
421 Name == "kortestc.w" || // Added in 7.0
422 Name == "kortestz.w" || // Added in 7.0
423 Name.starts_with("kunpck") || // added in 6.0
424 Name == "kxnor.w" || // Added in 7.0
425 Name == "kxor.w" || // Added in 7.0
426 Name.starts_with("padds.") || // Added in 8.0
427 Name.starts_with("pbroadcast") || // Added in 3.9
428 Name.starts_with("prol") || // Added in 8.0
429 Name.starts_with("pror") || // Added in 8.0
430 Name.starts_with("psll.dq") || // Added in 3.9
431 Name.starts_with("psrl.dq") || // Added in 3.9
432 Name.starts_with("psubs.") || // Added in 8.0
433 Name.starts_with("ptestm") || // Added in 6.0
434 Name.starts_with("ptestnm") || // Added in 6.0
435 Name.starts_with("storent.") || // Added in 3.9
436 Name.starts_with("vbroadcast.s") || // Added in 7.0
437 Name.starts_with("vpshld.") || // Added in 8.0
438 Name.starts_with("vpshrd.")); // Added in 8.0
439 }
440
441 if (Name.consume_front("fma."))
442 return (Name.starts_with("vfmadd.") || // Added in 7.0
443 Name.starts_with("vfmsub.") || // Added in 7.0
444 Name.starts_with("vfmsubadd.") || // Added in 7.0
445 Name.starts_with("vfnmadd.") || // Added in 7.0
446 Name.starts_with("vfnmsub.")); // Added in 7.0
447
448 if (Name.consume_front("fma4."))
449 return Name.starts_with("vfmadd.s"); // Added in 7.0
450
451 if (Name.consume_front("sse."))
452 return (Name == "add.ss" || // Added in 4.0
453 Name == "cvtsi2ss" || // Added in 7.0
454 Name == "cvtsi642ss" || // Added in 7.0
455 Name == "div.ss" || // Added in 4.0
456 Name == "mul.ss" || // Added in 4.0
457 Name.starts_with("sqrt.p") || // Added in 7.0
458 Name == "sqrt.ss" || // Added in 7.0
459 Name.starts_with("storeu.") || // Added in 3.9
460 Name == "sub.ss"); // Added in 4.0
461
462 if (Name.consume_front("sse2."))
463 return (Name == "add.sd" || // Added in 4.0
464 Name == "cvtdq2pd" || // Added in 3.9
465 Name == "cvtdq2ps" || // Added in 7.0
466 Name == "cvtps2pd" || // Added in 3.9
467 Name == "cvtsi2sd" || // Added in 7.0
468 Name == "cvtsi642sd" || // Added in 7.0
469 Name == "cvtss2sd" || // Added in 7.0
470 Name == "div.sd" || // Added in 4.0
471 Name == "mul.sd" || // Added in 4.0
472 Name.starts_with("padds.") || // Added in 8.0
473 Name.starts_with("paddus.") || // Added in 8.0
474 Name.starts_with("pcmpeq.") || // Added in 3.1
475 Name.starts_with("pcmpgt.") || // Added in 3.1
476 Name == "pmaxs.w" || // Added in 3.9
477 Name == "pmaxu.b" || // Added in 3.9
478 Name == "pmins.w" || // Added in 3.9
479 Name == "pminu.b" || // Added in 3.9
480 Name == "pmulu.dq" || // Added in 7.0
481 Name.starts_with("pshuf") || // Added in 3.9
482 Name.starts_with("psll.dq") || // Added in 3.7
483 Name.starts_with("psrl.dq") || // Added in 3.7
484 Name.starts_with("psubs.") || // Added in 8.0
485 Name.starts_with("psubus.") || // Added in 8.0
486 Name.starts_with("sqrt.p") || // Added in 7.0
487 Name == "sqrt.sd" || // Added in 7.0
488 Name == "storel.dq" || // Added in 3.9
489 Name.starts_with("storeu.") || // Added in 3.9
490 Name == "sub.sd"); // Added in 4.0
491
492 if (Name.consume_front("sse41."))
493 return (Name.starts_with("blendp") || // Added in 3.7
494 Name == "movntdqa" || // Added in 5.0
495 Name == "pblendw" || // Added in 3.7
496 Name == "pmaxsb" || // Added in 3.9
497 Name == "pmaxsd" || // Added in 3.9
498 Name == "pmaxud" || // Added in 3.9
499 Name == "pmaxuw" || // Added in 3.9
500 Name == "pminsb" || // Added in 3.9
501 Name == "pminsd" || // Added in 3.9
502 Name == "pminud" || // Added in 3.9
503 Name == "pminuw" || // Added in 3.9
504 Name.starts_with("pmovsx") || // Added in 3.8
505 Name.starts_with("pmovzx") || // Added in 3.9
506 Name == "pmuldq"); // Added in 7.0
507
508 if (Name.consume_front("sse42."))
509 return Name == "crc32.64.8"; // Added in 3.4
510
511 if (Name.consume_front("sse4a."))
512 return Name.starts_with("movnt."); // Added in 3.9
513
514 if (Name.consume_front("ssse3."))
515 return (Name == "pabs.b.128" || // Added in 6.0
516 Name == "pabs.d.128" || // Added in 6.0
517 Name == "pabs.w.128"); // Added in 6.0
518
519 if (Name.consume_front("xop."))
520 return (Name == "vpcmov" || // Added in 3.8
521 Name == "vpcmov.256" || // Added in 5.0
522 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
523 Name.starts_with("vprot")); // Added in 8.0
524
525 return (Name == "addcarry.u32" || // Added in 8.0
526 Name == "addcarry.u64" || // Added in 8.0
527 Name == "addcarryx.u32" || // Added in 8.0
528 Name == "addcarryx.u64" || // Added in 8.0
529 Name == "subborrow.u32" || // Added in 8.0
530 Name == "subborrow.u64" || // Added in 8.0
531 Name.starts_with("vcvtph2ps.")); // Added in 11.0
532}
533
535 Function *&NewFn) {
536 // Only handle intrinsics that start with "x86.".
537 if (!Name.consume_front("x86."))
538 return false;
539
540 if (shouldUpgradeX86Intrinsic(F, Name)) {
541 NewFn = nullptr;
542 return true;
543 }
544
545 if (Name == "rdtscp") { // Added in 8.0
546 // If this intrinsic has 0 operands, it's the new version.
547 if (F->getFunctionType()->getNumParams() == 0)
548 return false;
549
550 rename(F);
551 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
552 Intrinsic::x86_rdtscp);
553 return true;
554 }
555
557
558 // SSE4.1 ptest functions may have an old signature.
559 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
561 .Case("c", Intrinsic::x86_sse41_ptestc)
562 .Case("z", Intrinsic::x86_sse41_ptestz)
563 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
566 return upgradePTESTIntrinsic(F, ID, NewFn);
567
568 return false;
569 }
570
571 // Several blend and other instructions with masks used the wrong number of
572 // bits.
573
574 // Added in 3.6
576 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
584 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
585
586 if (Name.consume_front("avx512.")) {
587 if (Name.consume_front("mask.cmp.")) {
588 // Added in 7.0
590 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
598 return upgradeX86MaskedFPCompare(F, ID, NewFn);
599 } else if (Name.starts_with("vpdpbusd.") ||
600 Name.starts_with("vpdpbusds.")) {
601 // Added in 21.1
603 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
611 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
612 } else if (Name.starts_with("vpdpwssd.") ||
613 Name.starts_with("vpdpwssds.")) {
614 // Added in 21.1
616 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
624 return upgradeX86MultiplyAddWords(F, ID, NewFn);
625 }
626 return false; // No other 'x86.avx512.*'.
627 }
628
629 if (Name.consume_front("avx2.")) {
630 if (Name.consume_front("vpdpb")) {
631 // Added in 21.1
633 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
647 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
648 } else if (Name.consume_front("vpdpw")) {
649 // Added in 21.1
651 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
665 return upgradeX86MultiplyAddWords(F, ID, NewFn);
666 }
667 return false; // No other 'x86.avx2.*'
668 }
669
670 if (Name.consume_front("avx10.")) {
671 if (Name.consume_front("vpdpb")) {
672 // Added in 21.1
674 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
682 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
683 } else if (Name.consume_front("vpdpw")) {
685 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
693 return upgradeX86MultiplyAddWords(F, ID, NewFn);
694 }
695 return false; // No other 'x86.avx10.*'
696 }
697
698 if (Name.consume_front("avx512bf16.")) {
699 // Added in 9.0
701 .Case("cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .Case("cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .Case("cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .Case("mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .Case("cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .Case("cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
715 return upgradeX86BF16Intrinsic(F, ID, NewFn);
716
717 // Added in 9.0
719 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
724 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
725 return false; // No other 'x86.avx512bf16.*'.
726 }
727
728 if (Name.consume_front("xop.")) {
730 if (Name.starts_with("vpermil2")) { // Added in 3.9
731 // Upgrade any XOP PERMIL2 index operand still using a float/double
732 // vector.
733 auto Idx = F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
743 else
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
745 }
746 } else if (F->arg_size() == 2)
747 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
749 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
752
754 rename(F);
755 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
756 return true;
757 }
758 return false; // No other 'x86.xop.*'
759 }
760
761 if (Name == "seh.recoverfp") {
762 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
763 Intrinsic::eh_recoverfp);
764 return true;
765 }
766
767 return false;
768}
769
770// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
771// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
773 StringRef Name,
774 Function *&NewFn) {
775 if (Name.starts_with("rbit")) {
776 // '(arm|aarch64).rbit'.
778 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
779 return true;
780 }
781
782 if (Name == "thread.pointer") {
783 // '(arm|aarch64).thread.pointer'.
785 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
786 return true;
787 }
788
789 bool Neon = Name.consume_front("neon.");
790 if (Neon) {
791 // '(arm|aarch64).neon.*'.
792 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
793 // v16i8 respectively.
794 if (Name.consume_front("bfdot.")) {
795 // (arm|aarch64).neon.bfdot.*'.
798 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
799 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
800 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
803 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
806 LLVMContext &Ctx = F->getParent()->getContext();
807 std::array<Type *, 2> Tys{
808 {F->getReturnType(),
809 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
810 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
811 return true;
812 }
813 return false; // No other '(arm|aarch64).neon.bfdot.*'.
814 }
815
816 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
817 // anymore and accept v8bf16 instead of v16i8.
818 if (Name.consume_front("bfm")) {
819 // (arm|aarch64).neon.bfm*'.
820 if (Name.consume_back(".v4f32.v16i8")) {
821 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
824 .Case("mla",
825 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
826 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
827 .Case("lalb",
828 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
829 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
830 .Case("lalt",
831 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
832 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
835 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
836 return true;
837 }
838 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
839 }
840 return false; // No other '(arm|aarch64).neon.bfm*.
841 }
842 // Continue on to Aarch64 Neon or Arm Neon.
843 }
844 // Continue on to Arm or Aarch64.
845
846 if (IsArm) {
847 // 'arm.*'.
848 if (Neon) {
849 // 'arm.neon.*'.
851 .StartsWith("vclz.", Intrinsic::ctlz)
852 .StartsWith("vcnt.", Intrinsic::ctpop)
853 .StartsWith("vqadds.", Intrinsic::sadd_sat)
854 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
855 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
856 .StartsWith("vqsubu.", Intrinsic::usub_sat)
857 .StartsWith("vrinta.", Intrinsic::round)
858 .StartsWith("vrintn.", Intrinsic::roundeven)
859 .StartsWith("vrintm.", Intrinsic::floor)
860 .StartsWith("vrintp.", Intrinsic::ceil)
861 .StartsWith("vrintx.", Intrinsic::rint)
862 .StartsWith("vrintz.", Intrinsic::trunc)
865 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
866 F->arg_begin()->getType());
867 return true;
868 }
869
870 if (Name.consume_front("vst")) {
871 // 'arm.neon.vst*'.
872 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
874 if (vstRegex.match(Name, &Groups)) {
875 static const Intrinsic::ID StoreInts[] = {
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
878
879 static const Intrinsic::ID StoreLaneInts[] = {
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
882
883 auto fArgs = F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
885 if (Groups[1].size() == 1)
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
888 else
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
891 return true;
892 }
893 return false; // No other 'arm.neon.vst*'.
894 }
895
896 return false; // No other 'arm.neon.*'.
897 }
898
899 if (Name.consume_front("mve.")) {
900 // 'arm.mve.*'.
901 if (Name == "vctp64") {
902 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
903 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
904 // the function and deal with it below in UpgradeIntrinsicCall.
905 rename(F);
906 return true;
907 }
908 return false; // Not 'arm.mve.vctp64'.
909 }
910
911 if (Name.starts_with("vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
914 return true;
915 }
916
917 // These too are changed to accept a v2i1 instead of the old v4i1.
918 if (Name.consume_back(".v4i1")) {
919 // 'arm.mve.*.v4i1'.
920 if (Name.consume_back(".predicated.v2i64.v4i32"))
921 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
922 return Name == "mull.int" || Name == "vqdmull";
923
924 if (Name.consume_back(".v2i64")) {
925 // 'arm.mve.*.v2i64.v4i1'
926 bool IsGather = Name.consume_front("vldr.gather.");
927 if (IsGather || Name.consume_front("vstr.scatter.")) {
928 if (Name.consume_front("base.")) {
929 // Optional 'wb.' prefix.
930 Name.consume_front("wb.");
931 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
932 // predicated.v2i64.v2i64.v4i1'.
933 return Name == "predicated.v2i64";
934 }
935
936 if (Name.consume_front("offset.predicated."))
937 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
938 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
939
940 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
941 return false;
942 }
943
944 return false; // No other 'arm.mve.*.v2i64.v4i1'.
945 }
946 return false; // No other 'arm.mve.*.v4i1'.
947 }
948 return false; // No other 'arm.mve.*'.
949 }
950
951 if (Name.consume_front("cde.vcx")) {
952 // 'arm.cde.vcx*'.
953 if (Name.consume_back(".predicated.v2i64.v4i1"))
954 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
955 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
956 Name == "3q" || Name == "3qa";
957
958 return false; // No other 'arm.cde.vcx*'.
959 }
960 } else {
961 // 'aarch64.*'.
962 if (Neon) {
963 // 'aarch64.neon.*'.
965 .StartsWith("frintn", Intrinsic::roundeven)
966 .StartsWith("rbit", Intrinsic::bitreverse)
969 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
970 F->arg_begin()->getType());
971 return true;
972 }
973
974 if (Name.starts_with("addp")) {
975 // 'aarch64.neon.addp*'.
976 if (F->arg_size() != 2)
977 return false; // Invalid IR.
978 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
982 return true;
983 }
984 }
985
986 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
987 if (Name.starts_with("bfcvt")) {
988 NewFn = nullptr;
989 return true;
990 }
991
992 return false; // No other 'aarch64.neon.*'.
993 }
994 if (Name.consume_front("sve.")) {
995 // 'aarch64.sve.*'.
996 if (Name.consume_front("bf")) {
997 if (Name.consume_back(".lane")) {
998 // 'aarch64.sve.bf*.lane'.
1001 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1006 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1007 return true;
1008 }
1009 return false; // No other 'aarch64.sve.bf*.lane'.
1010 }
1011 return false; // No other 'aarch64.sve.bf*'.
1012 }
1013
1014 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1015 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1016 NewFn = nullptr;
1017 return true;
1018 }
1019
1020 if (Name.consume_front("addqv")) {
1021 // 'aarch64.sve.addqv'.
1022 if (!F->getReturnType()->isFPOrFPVectorTy())
1023 return false;
1024
1025 auto Args = F->getFunctionType()->params();
1026 Type *Tys[] = {F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1029 return true;
1030 }
1031
1032 if (Name.consume_front("ld")) {
1033 // 'aarch64.sve.ld*'.
1034 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.match(Name)) {
1036 Type *ScalarTy =
1037 cast<VectorType>(F->getReturnType())->getElementType();
1038 ElementCount EC =
1039 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1040 Type *Ty = VectorType::get(ScalarTy, EC);
1041 static const Intrinsic::ID LoadIDs[] = {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1045 };
1046 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1047 LoadIDs[Name[0] - '2'], Ty);
1048 return true;
1049 }
1050 return false; // No other 'aarch64.sve.ld*'.
1051 }
1052
1053 if (Name.consume_front("tuple.")) {
1054 // 'aarch64.sve.tuple.*'.
1055 if (Name.starts_with("get")) {
1056 // 'aarch64.sve.tuple.get*'.
1057 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1060 return true;
1061 }
1062
1063 if (Name.starts_with("set")) {
1064 // 'aarch64.sve.tuple.set*'.
1065 auto Args = F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1069 return true;
1070 }
1071
1072 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.match(Name)) {
1074 // 'aarch64.sve.tuple.create*'.
1075 auto Args = F->getFunctionType()->params();
1076 Type *Tys[] = {F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.tuple.*'.
1082 }
1083
1084 if (Name.starts_with("rev.nxv")) {
1085 // 'aarch64.sve.rev.<Ty>'
1087 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1088 return true;
1089 }
1090
1091 return false; // No other 'aarch64.sve.*'.
1092 }
1093 }
1094 return false; // No other 'arm.*', 'aarch64.*'.
1095}
1096
1098 StringRef Name) {
1099 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1102 .Case("im2col.3d",
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1104 .Case("im2col.4d",
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1106 .Case("im2col.5d",
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1114
1116 return ID;
1117
1118 // These intrinsics may need upgrade for two reasons:
1119 // (1) When the address-space of the first argument is shared[AS=3]
1120 // (and we upgrade it to use shared_cluster address-space[AS=7])
1121 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1123 return ID;
1124
1125 // (2) When there are only two boolean flag arguments at the end:
1126 //
1127 // The last three parameters of the older version of these
1128 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1129 //
1130 // The newer version reads as:
1131 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1132 //
1133 // So, when the type of the [N-3]rd argument is "not i1", then
1134 // it is the older version and we need to upgrade.
1135 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1137 if (!ArgType->isIntegerTy(1))
1138 return ID;
1139 }
1140
1142}
1143
1145 StringRef Name) {
1146 if (Name.consume_front("mapa.shared.cluster"))
1147 if (F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1150
1151 if (Name.consume_front("cp.async.bulk.")) {
1154 .Case("global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .Case("shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1159
1161 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1163 return ID;
1164 }
1165
1167}
1168
1170 if (Name.consume_front("fma.rn."))
1171 return StringSwitch<Intrinsic::ID>(Name)
1172 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
1175 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
1176 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
1177 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
1178 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
1179 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
1180 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1181 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1182 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
1183 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
1185
1186 if (Name.consume_front("fmax."))
1187 return StringSwitch<Intrinsic::ID>(Name)
1188 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1189 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1190 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1191 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1192 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1193 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1194 .Case("ftz.nan.xorsign.abs.bf16",
1195 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1196 .Case("ftz.nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1198 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1199 .Case("ftz.xorsign.abs.bf16x2",
1200 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1201 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1202 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1203 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1204 .Case("nan.xorsign.abs.bf16x2",
1205 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1206 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1207 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1209
1210 if (Name.consume_front("fmin."))
1211 return StringSwitch<Intrinsic::ID>(Name)
1212 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1213 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1214 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1215 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1216 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1217 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1218 .Case("ftz.nan.xorsign.abs.bf16",
1219 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1220 .Case("ftz.nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1222 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1223 .Case("ftz.xorsign.abs.bf16x2",
1224 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1225 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1226 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1227 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1228 .Case("nan.xorsign.abs.bf16x2",
1229 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1230 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1231 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1233
1234 if (Name.consume_front("neg."))
1235 return StringSwitch<Intrinsic::ID>(Name)
1236 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1237 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1239
1241}
1242
1244 return Name.consume_front("local") || Name.consume_front("shared") ||
1245 Name.consume_front("global") || Name.consume_front("constant") ||
1246 Name.consume_front("param");
1247}
1248
1250 bool CanUpgradeDebugIntrinsicsToRecords) {
1251 assert(F && "Illegal to upgrade a non-existent Function.");
1252
1253 StringRef Name = F->getName();
1254
1255 // Quickly eliminate it, if it's not a candidate.
1256 if (!Name.consume_front("llvm.") || Name.empty())
1257 return false;
1258
1259 switch (Name[0]) {
1260 default: break;
1261 case 'a': {
1262 bool IsArm = Name.consume_front("arm.");
1263 if (IsArm || Name.consume_front("aarch64.")) {
1264 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1265 return true;
1266 break;
1267 }
1268
1269 if (Name.consume_front("amdgcn.")) {
1270 if (Name == "alignbit") {
1271 // Target specific intrinsic became redundant
1273 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1274 return true;
1275 }
1276
1277 if (Name.consume_front("atomic.")) {
1278 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1279 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1280 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1281 // and usub_sat so there's no new declaration.
1282 NewFn = nullptr;
1283 return true;
1284 }
1285 break; // No other 'amdgcn.atomic.*'
1286 }
1287
1288 // Legacy wmma iu intrinsics without the optional clamp operand.
1289 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1290 F->arg_size() == 7) {
1291 NewFn = nullptr;
1292 return true;
1293 }
1294 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1295 F->arg_size() == 8) {
1296 NewFn = nullptr;
1297 return true;
1298 }
1299
1300 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1301 Name.consume_front("flat.atomic.")) {
1302 if (Name.starts_with("fadd") ||
1303 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1304 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1305 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1306 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1307 // declaration.
1308 NewFn = nullptr;
1309 return true;
1310 }
1311 }
1312
1313 if (Name.starts_with("ldexp.")) {
1314 // Target specific intrinsic became redundant
1316 F->getParent(), Intrinsic::ldexp,
1317 {F->getReturnType(), F->getArg(1)->getType()});
1318 return true;
1319 }
1320 break; // No other 'amdgcn.*'
1321 }
1322
1323 break;
1324 }
1325 case 'c': {
1326 if (F->arg_size() == 1) {
1328 .StartsWith("ctlz.", Intrinsic::ctlz)
1329 .StartsWith("cttz.", Intrinsic::cttz)
1332 rename(F);
1333 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1334 F->arg_begin()->getType());
1335 return true;
1336 }
1337 }
1338
1339 if (F->arg_size() == 2 && Name == "coro.end") {
1340 rename(F);
1341 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1342 Intrinsic::coro_end);
1343 return true;
1344 }
1345
1346 break;
1347 }
1348 case 'd':
1349 if (Name.consume_front("dbg.")) {
1350 // Mark debug intrinsics for upgrade to new debug format.
1351 if (CanUpgradeDebugIntrinsicsToRecords) {
1352 if (Name == "addr" || Name == "value" || Name == "assign" ||
1353 Name == "declare" || Name == "label") {
1354 // There's no function to replace these with.
1355 NewFn = nullptr;
1356 // But we do want these to get upgraded.
1357 return true;
1358 }
1359 }
1360 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1361 // converted to DbgVariableRecords later.
1362 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1363 rename(F);
1364 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1365 Intrinsic::dbg_value);
1366 return true;
1367 }
1368 break; // No other 'dbg.*'.
1369 }
1370 break;
1371 case 'e':
1372 if (Name.consume_front("experimental.vector.")) {
1375 // Skip over extract.last.active, otherwise it will be 'upgraded'
1376 // to a regular vector extract which is a different operation.
1377 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1378 .StartsWith("extract.", Intrinsic::vector_extract)
1379 .StartsWith("insert.", Intrinsic::vector_insert)
1380 .StartsWith("reverse.", Intrinsic::vector_reverse)
1381 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1382 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1383 .StartsWith("partial.reduce.add",
1384 Intrinsic::vector_partial_reduce_add)
1387 const auto *FT = F->getFunctionType();
1389 if (ID == Intrinsic::vector_extract ||
1390 ID == Intrinsic::vector_interleave2)
1391 // Extracting overloads the return type.
1392 Tys.push_back(FT->getReturnType());
1393 if (ID != Intrinsic::vector_interleave2)
1394 Tys.push_back(FT->getParamType(0));
1395 if (ID == Intrinsic::vector_insert ||
1396 ID == Intrinsic::vector_partial_reduce_add)
1397 // Inserting overloads the inserted type.
1398 Tys.push_back(FT->getParamType(1));
1399 rename(F);
1400 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1401 return true;
1402 }
1403
1404 if (Name.consume_front("reduce.")) {
1406 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1407 if (R.match(Name, &Groups))
1409 .Case("add", Intrinsic::vector_reduce_add)
1410 .Case("mul", Intrinsic::vector_reduce_mul)
1411 .Case("and", Intrinsic::vector_reduce_and)
1412 .Case("or", Intrinsic::vector_reduce_or)
1413 .Case("xor", Intrinsic::vector_reduce_xor)
1414 .Case("smax", Intrinsic::vector_reduce_smax)
1415 .Case("smin", Intrinsic::vector_reduce_smin)
1416 .Case("umax", Intrinsic::vector_reduce_umax)
1417 .Case("umin", Intrinsic::vector_reduce_umin)
1418 .Case("fmax", Intrinsic::vector_reduce_fmax)
1419 .Case("fmin", Intrinsic::vector_reduce_fmin)
1421
1422 bool V2 = false;
1424 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1425 Groups.clear();
1426 V2 = true;
1427 if (R2.match(Name, &Groups))
1429 .Case("fadd", Intrinsic::vector_reduce_fadd)
1430 .Case("fmul", Intrinsic::vector_reduce_fmul)
1432 }
1434 rename(F);
1435 auto Args = F->getFunctionType()->params();
1436 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1437 {Args[V2 ? 1 : 0]});
1438 return true;
1439 }
1440 break; // No other 'expermental.vector.reduce.*'.
1441 }
1442
1443 if (Name.consume_front("splice"))
1444 return true;
1445 break; // No other 'experimental.vector.*'.
1446 }
1447 if (Name.consume_front("experimental.stepvector.")) {
1448 Intrinsic::ID ID = Intrinsic::stepvector;
1449 rename(F);
1451 F->getParent(), ID, F->getFunctionType()->getReturnType());
1452 return true;
1453 }
1454 break; // No other 'e*'.
1455 case 'f':
1456 if (Name.starts_with("flt.rounds")) {
1457 rename(F);
1458 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1459 Intrinsic::get_rounding);
1460 return true;
1461 }
1462 break;
1463 case 'i':
1464 if (Name.starts_with("invariant.group.barrier")) {
1465 // Rename invariant.group.barrier to launder.invariant.group
1466 auto Args = F->getFunctionType()->params();
1467 Type* ObjectPtr[1] = {Args[0]};
1468 rename(F);
1470 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1471 return true;
1472 }
1473 break;
1474 case 'l':
1475 if ((Name.starts_with("lifetime.start") ||
1476 Name.starts_with("lifetime.end")) &&
1477 F->arg_size() == 2) {
1478 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1479 ? Intrinsic::lifetime_start
1480 : Intrinsic::lifetime_end;
1481 rename(F);
1482 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1483 F->getArg(0)->getType());
1484 return true;
1485 }
1486 break;
1487 case 'm': {
1488 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1489 // alignment parameter to embedding the alignment as an attribute of
1490 // the pointer args.
1491 if (unsigned ID = StringSwitch<unsigned>(Name)
1492 .StartsWith("memcpy.", Intrinsic::memcpy)
1493 .StartsWith("memmove.", Intrinsic::memmove)
1494 .Default(0)) {
1495 if (F->arg_size() == 5) {
1496 rename(F);
1497 // Get the types of dest, src, and len
1498 ArrayRef<Type *> ParamTypes =
1499 F->getFunctionType()->params().slice(0, 3);
1500 NewFn =
1501 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1502 return true;
1503 }
1504 }
1505 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1506 rename(F);
1507 // Get the types of dest, and len
1508 const auto *FT = F->getFunctionType();
1509 Type *ParamTypes[2] = {
1510 FT->getParamType(0), // Dest
1511 FT->getParamType(2) // len
1512 };
1513 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1514 Intrinsic::memset, ParamTypes);
1515 return true;
1516 }
1517
1518 unsigned MaskedID =
1520 .StartsWith("masked.load", Intrinsic::masked_load)
1521 .StartsWith("masked.gather", Intrinsic::masked_gather)
1522 .StartsWith("masked.store", Intrinsic::masked_store)
1523 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1524 .Default(0);
1525 if (MaskedID && F->arg_size() == 4) {
1526 rename(F);
1527 if (MaskedID == Intrinsic::masked_load ||
1528 MaskedID == Intrinsic::masked_gather) {
1530 F->getParent(), MaskedID,
1531 {F->getReturnType(), F->getArg(0)->getType()});
1532 return true;
1533 }
1535 F->getParent(), MaskedID,
1536 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1537 return true;
1538 }
1539 break;
1540 }
1541 case 'n': {
1542 if (Name.consume_front("nvvm.")) {
1543 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1544 if (F->arg_size() == 1) {
1545 Intrinsic::ID IID =
1547 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1548 .Case("clz.i", Intrinsic::ctlz)
1549 .Case("popc.i", Intrinsic::ctpop)
1551 if (IID != Intrinsic::not_intrinsic) {
1552 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1553 {F->getReturnType()});
1554 return true;
1555 }
1556 } else if (F->arg_size() == 2) {
1557 Intrinsic::ID IID =
1559 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1560 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1561 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1562 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1564 if (IID != Intrinsic::not_intrinsic) {
1565 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1566 {F->getReturnType()});
1567 return true;
1568 }
1569 }
1570
1571 // Check for nvvm intrinsics that need a return type adjustment.
1572 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1574 if (IID != Intrinsic::not_intrinsic) {
1575 NewFn = nullptr;
1576 return true;
1577 }
1578 }
1579
1580 // Upgrade Distributed Shared Memory Intrinsics
1582 if (IID != Intrinsic::not_intrinsic) {
1583 rename(F);
1584 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1585 return true;
1586 }
1587
1588 // Upgrade TMA copy G2S Intrinsics
1590 if (IID != Intrinsic::not_intrinsic) {
1591 rename(F);
1592 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1593 return true;
1594 }
1595
1596 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1597 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1598 //
1599 // TODO: We could add lohi.i2d.
1600 bool Expand = false;
1601 if (Name.consume_front("abs."))
1602 // nvvm.abs.{i,ii}
1603 Expand =
1604 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1605 else if (Name.consume_front("fabs."))
1606 // nvvm.fabs.{f,ftz.f,d}
1607 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1608 else if (Name.consume_front("ex2.approx."))
1609 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1610 Expand =
1611 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1612 else if (Name.consume_front("atomic.load."))
1613 // nvvm.atomic.load.add.{f32,f64}.p
1614 // nvvm.atomic.load.{inc,dec}.32.p
1615 Expand = StringSwitch<bool>(Name)
1616 .StartsWith("add.f32.p", true)
1617 .StartsWith("add.f64.p", true)
1618 .StartsWith("inc.32.p", true)
1619 .StartsWith("dec.32.p", true)
1620 .Default(false);
1621 else if (Name.consume_front("bitcast."))
1622 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1623 Expand =
1624 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1625 else if (Name.consume_front("rotate."))
1626 // nvvm.rotate.{b32,b64,right.b64}
1627 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1628 else if (Name.consume_front("ptr.gen.to."))
1629 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1630 Expand = consumeNVVMPtrAddrSpace(Name);
1631 else if (Name.consume_front("ptr."))
1632 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1633 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1634 else if (Name.consume_front("ldg.global."))
1635 // nvvm.ldg.global.{i,p,f}
1636 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1637 Name.starts_with("p."));
1638 else
1639 Expand = StringSwitch<bool>(Name)
1640 .Case("barrier0", true)
1641 .Case("barrier.n", true)
1642 .Case("barrier.sync.cnt", true)
1643 .Case("barrier.sync", true)
1644 .Case("barrier", true)
1645 .Case("bar.sync", true)
1646 .Case("barrier0.popc", true)
1647 .Case("barrier0.and", true)
1648 .Case("barrier0.or", true)
1649 .Case("clz.ll", true)
1650 .Case("popc.ll", true)
1651 .Case("h2f", true)
1652 .Case("swap.lo.hi.b64", true)
1653 .Case("tanh.approx.f32", true)
1654 .Default(false);
1655
1656 if (Expand) {
1657 NewFn = nullptr;
1658 return true;
1659 }
1660 break; // No other 'nvvm.*'.
1661 }
1662 break;
1663 }
1664 case 'o':
1665 if (Name.starts_with("objectsize.")) {
1666 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1667 if (F->arg_size() == 2 || F->arg_size() == 3) {
1668 rename(F);
1669 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1670 Intrinsic::objectsize, Tys);
1671 return true;
1672 }
1673 }
1674 break;
1675
1676 case 'p':
1677 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1678 rename(F);
1680 F->getParent(), Intrinsic::ptr_annotation,
1681 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1682 return true;
1683 }
1684 break;
1685
1686 case 'r': {
1687 if (Name.consume_front("riscv.")) {
1690 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1691 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1692 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1693 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1696 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1697 rename(F);
1698 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1699 return true;
1700 }
1701 break; // No other applicable upgrades.
1702 }
1703
1705 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1706 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1709 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1710 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1711 rename(F);
1712 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1713 return true;
1714 }
1715 break; // No other applicable upgrades.
1716 }
1717
1719 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1720 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1721 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1722 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1723 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1724 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1727 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1728 rename(F);
1729 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1730 return true;
1731 }
1732 break; // No other applicable upgrades.
1733 }
1734 break; // No other 'riscv.*' intrinsics
1735 }
1736 } break;
1737
1738 case 's':
1739 if (Name == "stackprotectorcheck") {
1740 NewFn = nullptr;
1741 return true;
1742 }
1743 break;
1744
1745 case 't':
1746 if (Name == "thread.pointer") {
1748 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1749 return true;
1750 }
1751 break;
1752
1753 case 'v': {
1754 if (Name == "var.annotation" && F->arg_size() == 4) {
1755 rename(F);
1757 F->getParent(), Intrinsic::var_annotation,
1758 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1759 return true;
1760 }
1761 if (Name.consume_front("vector.splice")) {
1762 if (Name.starts_with(".left") || Name.starts_with(".right"))
1763 break;
1764 return true;
1765 }
1766 break;
1767 }
1768
1769 case 'w':
1770 if (Name.consume_front("wasm.")) {
1773 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1774 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1775 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1778 rename(F);
1779 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1780 F->getReturnType());
1781 return true;
1782 }
1783
1784 if (Name.consume_front("dot.i8x16.i7x16.")) {
1786 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1787 .Case("add.signed",
1788 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1791 rename(F);
1792 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1793 return true;
1794 }
1795 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1796 }
1797 break; // No other 'wasm.*'.
1798 }
1799 break;
1800
1801 case 'x':
1802 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1803 return true;
1804 }
1805
1806 auto *ST = dyn_cast<StructType>(F->getReturnType());
1807 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1808 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1809 // Replace return type with literal non-packed struct. Only do this for
1810 // intrinsics declared to return a struct, not for intrinsics with
1811 // overloaded return type, in which case the exact struct type will be
1812 // mangled into the name.
1815 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1816 auto *FT = F->getFunctionType();
1817 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1818 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1819 std::string Name = F->getName().str();
1820 rename(F);
1821 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1822 Name, F->getParent());
1823
1824 // The new function may also need remangling.
1825 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1826 NewFn = *Result;
1827 return true;
1828 }
1829 }
1830
1831 // Remangle our intrinsic since we upgrade the mangling
1833 if (Result != std::nullopt) {
1834 NewFn = *Result;
1835 return true;
1836 }
1837
1838 // This may not belong here. This function is effectively being overloaded
1839 // to both detect an intrinsic which needs upgrading, and to provide the
1840 // upgraded form of the intrinsic. We should perhaps have two separate
1841 // functions for this.
1842 return false;
1843}
1844
1846 bool CanUpgradeDebugIntrinsicsToRecords) {
1847 NewFn = nullptr;
1848 bool Upgraded =
1849 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1850
1851 // Upgrade intrinsic attributes. This does not change the function.
1852 if (NewFn)
1853 F = NewFn;
1854 if (Intrinsic::ID id = F->getIntrinsicID()) {
1855 // Only do this if the intrinsic signature is valid.
1856 SmallVector<Type *> OverloadTys;
1857 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1858 F->setAttributes(
1859 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1860 }
1861 return Upgraded;
1862}
1863
1865 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1866 GV->getName() == "llvm.global_dtors")) ||
1867 !GV->hasInitializer())
1868 return nullptr;
1870 if (!ATy)
1871 return nullptr;
1873 if (!STy || STy->getNumElements() != 2)
1874 return nullptr;
1875
1876 LLVMContext &C = GV->getContext();
1877 IRBuilder<> IRB(C);
1878 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1879 IRB.getPtrTy());
1880 Constant *Init = GV->getInitializer();
1881 unsigned N = Init->getNumOperands();
1882 std::vector<Constant *> NewCtors(N);
1883 for (unsigned i = 0; i != N; ++i) {
1884 auto Ctor = cast<Constant>(Init->getOperand(i));
1885 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1886 Ctor->getAggregateElement(1),
1888 }
1889 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1890
1891 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1892 NewInit, GV->getName());
1893}
1894
1895// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1896// to byte shuffles.
1898 unsigned Shift) {
1899 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1900 unsigned NumElts = ResultTy->getNumElements() * 8;
1901
1902 // Bitcast from a 64-bit element type to a byte element type.
1903 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1904 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1905
1906 // We'll be shuffling in zeroes.
1907 Value *Res = Constant::getNullValue(VecTy);
1908
1909 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1910 // we'll just return the zero vector.
1911 if (Shift < 16) {
1912 int Idxs[64];
1913 // 256/512-bit version is split into 2/4 16-byte lanes.
1914 for (unsigned l = 0; l != NumElts; l += 16)
1915 for (unsigned i = 0; i != 16; ++i) {
1916 unsigned Idx = NumElts + i - Shift;
1917 if (Idx < NumElts)
1918 Idx -= NumElts - 16; // end of lane, switch operand.
1919 Idxs[l + i] = Idx + l;
1920 }
1921
1922 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1923 }
1924
1925 // Bitcast back to a 64-bit element type.
1926 return Builder.CreateBitCast(Res, ResultTy, "cast");
1927}
1928
1929// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1930// to byte shuffles.
1932 unsigned Shift) {
1933 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1934 unsigned NumElts = ResultTy->getNumElements() * 8;
1935
1936 // Bitcast from a 64-bit element type to a byte element type.
1937 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1938 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1939
1940 // We'll be shuffling in zeroes.
1941 Value *Res = Constant::getNullValue(VecTy);
1942
1943 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1944 // we'll just return the zero vector.
1945 if (Shift < 16) {
1946 int Idxs[64];
1947 // 256/512-bit version is split into 2/4 16-byte lanes.
1948 for (unsigned l = 0; l != NumElts; l += 16)
1949 for (unsigned i = 0; i != 16; ++i) {
1950 unsigned Idx = i + Shift;
1951 if (Idx >= 16)
1952 Idx += NumElts - 16; // end of lane, switch operand.
1953 Idxs[l + i] = Idx + l;
1954 }
1955
1956 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1957 }
1958
1959 // Bitcast back to a 64-bit element type.
1960 return Builder.CreateBitCast(Res, ResultTy, "cast");
1961}
1962
1963static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1964 unsigned NumElts) {
1965 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1967 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1968 Mask = Builder.CreateBitCast(Mask, MaskTy);
1969
1970 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1971 // i8 and we need to extract down to the right number of elements.
1972 if (NumElts <= 4) {
1973 int Indices[4];
1974 for (unsigned i = 0; i != NumElts; ++i)
1975 Indices[i] = i;
1976 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1977 "extract");
1978 }
1979
1980 return Mask;
1981}
1982
1983static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1984 Value *Op1) {
1985 // If the mask is all ones just emit the first operation.
1986 if (const auto *C = dyn_cast<Constant>(Mask))
1987 if (C->isAllOnesValue())
1988 return Op0;
1989
1990 Mask = getX86MaskVec(Builder, Mask,
1991 cast<FixedVectorType>(Op0->getType())->getNumElements());
1992 return Builder.CreateSelect(Mask, Op0, Op1);
1993}
1994
1995static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1996 Value *Op1) {
1997 // If the mask is all ones just emit the first operation.
1998 if (const auto *C = dyn_cast<Constant>(Mask))
1999 if (C->isAllOnesValue())
2000 return Op0;
2001
2002 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2003 Mask->getType()->getIntegerBitWidth());
2004 Mask = Builder.CreateBitCast(Mask, MaskTy);
2005 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2006 return Builder.CreateSelect(Mask, Op0, Op1);
2007}
2008
2009// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2010// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2011// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2013 Value *Op1, Value *Shift,
2014 Value *Passthru, Value *Mask,
2015 bool IsVALIGN) {
2016 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2017
2018 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2019 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2020 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2021 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2022
2023 // Mask the immediate for VALIGN.
2024 if (IsVALIGN)
2025 ShiftVal &= (NumElts - 1);
2026
2027 // If palignr is shifting the pair of vectors more than the size of two
2028 // lanes, emit zero.
2029 if (ShiftVal >= 32)
2031
2032 // If palignr is shifting the pair of input vectors more than one lane,
2033 // but less than two lanes, convert to shifting in zeroes.
2034 if (ShiftVal > 16) {
2035 ShiftVal -= 16;
2036 Op1 = Op0;
2038 }
2039
2040 int Indices[64];
2041 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2042 for (unsigned l = 0; l < NumElts; l += 16) {
2043 for (unsigned i = 0; i != 16; ++i) {
2044 unsigned Idx = ShiftVal + i;
2045 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2046 Idx += NumElts - 16; // End of lane, switch operand.
2047 Indices[l + i] = Idx + l;
2048 }
2049 }
2050
2051 Value *Align = Builder.CreateShuffleVector(
2052 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2053
2054 return emitX86Select(Builder, Mask, Align, Passthru);
2055}
2056
2058 bool ZeroMask, bool IndexForm) {
2059 Type *Ty = CI.getType();
2060 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2061 unsigned EltWidth = Ty->getScalarSizeInBits();
2062 bool IsFloat = Ty->isFPOrFPVectorTy();
2063 Intrinsic::ID IID;
2064 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2065 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2066 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2067 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2068 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2069 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2070 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2071 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2072 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2073 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2074 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2075 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2076 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2077 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2078 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2079 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2080 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2081 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2082 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2083 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2084 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2085 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2086 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2087 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2088 else if (VecWidth == 128 && EltWidth == 16)
2089 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2090 else if (VecWidth == 256 && EltWidth == 16)
2091 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2092 else if (VecWidth == 512 && EltWidth == 16)
2093 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2094 else if (VecWidth == 128 && EltWidth == 8)
2095 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2096 else if (VecWidth == 256 && EltWidth == 8)
2097 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2098 else if (VecWidth == 512 && EltWidth == 8)
2099 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2100 else
2101 llvm_unreachable("Unexpected intrinsic");
2102
2103 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2104 CI.getArgOperand(2) };
2105
2106 // If this isn't index form we need to swap operand 0 and 1.
2107 if (!IndexForm)
2108 std::swap(Args[0], Args[1]);
2109
2110 Value *V = Builder.CreateIntrinsic(IID, Args);
2111 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2112 : Builder.CreateBitCast(CI.getArgOperand(1),
2113 Ty);
2114 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2115}
2116
2118 Intrinsic::ID IID) {
2119 Type *Ty = CI.getType();
2120 Value *Op0 = CI.getOperand(0);
2121 Value *Op1 = CI.getOperand(1);
2122 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2123
2124 if (CI.arg_size() == 4) { // For masked intrinsics.
2125 Value *VecSrc = CI.getOperand(2);
2126 Value *Mask = CI.getOperand(3);
2127 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2128 }
2129 return Res;
2130}
2131
2133 bool IsRotateRight) {
2134 Type *Ty = CI.getType();
2135 Value *Src = CI.getArgOperand(0);
2136 Value *Amt = CI.getArgOperand(1);
2137
2138 // Amount may be scalar immediate, in which case create a splat vector.
2139 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2140 // we only care about the lowest log2 bits anyway.
2141 if (Amt->getType() != Ty) {
2142 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2143 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2144 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2145 }
2146
2147 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2148 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2149
2150 if (CI.arg_size() == 4) { // For masked intrinsics.
2151 Value *VecSrc = CI.getOperand(2);
2152 Value *Mask = CI.getOperand(3);
2153 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2154 }
2155 return Res;
2156}
2157
2158static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2159 bool IsSigned) {
2160 Type *Ty = CI.getType();
2161 Value *LHS = CI.getArgOperand(0);
2162 Value *RHS = CI.getArgOperand(1);
2163
2164 CmpInst::Predicate Pred;
2165 switch (Imm) {
2166 case 0x0:
2167 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2168 break;
2169 case 0x1:
2170 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2171 break;
2172 case 0x2:
2173 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2174 break;
2175 case 0x3:
2176 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2177 break;
2178 case 0x4:
2179 Pred = ICmpInst::ICMP_EQ;
2180 break;
2181 case 0x5:
2182 Pred = ICmpInst::ICMP_NE;
2183 break;
2184 case 0x6:
2185 return Constant::getNullValue(Ty); // FALSE
2186 case 0x7:
2187 return Constant::getAllOnesValue(Ty); // TRUE
2188 default:
2189 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2190 }
2191
2192 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2193 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2194 return Ext;
2195}
2196
2198 bool IsShiftRight, bool ZeroMask) {
2199 Type *Ty = CI.getType();
2200 Value *Op0 = CI.getArgOperand(0);
2201 Value *Op1 = CI.getArgOperand(1);
2202 Value *Amt = CI.getArgOperand(2);
2203
2204 if (IsShiftRight)
2205 std::swap(Op0, Op1);
2206
2207 // Amount may be scalar immediate, in which case create a splat vector.
2208 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2209 // we only care about the lowest log2 bits anyway.
2210 if (Amt->getType() != Ty) {
2211 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2212 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2213 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2214 }
2215
2216 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2217 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2218
2219 unsigned NumArgs = CI.arg_size();
2220 if (NumArgs >= 4) { // For masked intrinsics.
2221 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2222 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2223 CI.getArgOperand(0);
2224 Value *Mask = CI.getOperand(NumArgs - 1);
2225 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2226 }
2227 return Res;
2228}
2229
2231 Value *Mask, bool Aligned) {
2232 const Align Alignment =
2233 Aligned
2234 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2235 : Align(1);
2236
2237 // If the mask is all ones just emit a regular store.
2238 if (const auto *C = dyn_cast<Constant>(Mask))
2239 if (C->isAllOnesValue())
2240 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2241
2242 // Convert the mask from an integer type to a vector of i1.
2243 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2244 Mask = getX86MaskVec(Builder, Mask, NumElts);
2245 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2246}
2247
2249 Value *Passthru, Value *Mask, bool Aligned) {
2250 Type *ValTy = Passthru->getType();
2251 const Align Alignment =
2252 Aligned
2253 ? Align(
2255 8)
2256 : Align(1);
2257
2258 // If the mask is all ones just emit a regular store.
2259 if (const auto *C = dyn_cast<Constant>(Mask))
2260 if (C->isAllOnesValue())
2261 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2262
2263 // Convert the mask from an integer type to a vector of i1.
2264 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2265 Mask = getX86MaskVec(Builder, Mask, NumElts);
2266 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2267}
2268
2269static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2270 Type *Ty = CI.getType();
2271 Value *Op0 = CI.getArgOperand(0);
2272 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2273 {Op0, Builder.getInt1(false)});
2274 if (CI.arg_size() == 3)
2275 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2276 return Res;
2277}
2278
2279static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2280 Type *Ty = CI.getType();
2281
2282 // Arguments have a vXi32 type so cast to vXi64.
2283 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2284 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2285
2286 if (IsSigned) {
2287 // Shift left then arithmetic shift right.
2288 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2289 LHS = Builder.CreateShl(LHS, ShiftAmt);
2290 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2291 RHS = Builder.CreateShl(RHS, ShiftAmt);
2292 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2293 } else {
2294 // Clear the upper bits.
2295 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2296 LHS = Builder.CreateAnd(LHS, Mask);
2297 RHS = Builder.CreateAnd(RHS, Mask);
2298 }
2299
2300 Value *Res = Builder.CreateMul(LHS, RHS);
2301
2302 if (CI.arg_size() == 4)
2303 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2304
2305 return Res;
2306}
2307
2308// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2310 Value *Mask) {
2311 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2312 if (Mask) {
2313 const auto *C = dyn_cast<Constant>(Mask);
2314 if (!C || !C->isAllOnesValue())
2315 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2316 }
2317
2318 if (NumElts < 8) {
2319 int Indices[8];
2320 for (unsigned i = 0; i != NumElts; ++i)
2321 Indices[i] = i;
2322 for (unsigned i = NumElts; i != 8; ++i)
2323 Indices[i] = NumElts + i % NumElts;
2324 Vec = Builder.CreateShuffleVector(Vec,
2326 Indices);
2327 }
2328 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2329}
2330
2332 unsigned CC, bool Signed) {
2333 Value *Op0 = CI.getArgOperand(0);
2334 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2335
2336 Value *Cmp;
2337 if (CC == 3) {
2339 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2340 } else if (CC == 7) {
2342 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2343 } else {
2345 switch (CC) {
2346 default: llvm_unreachable("Unknown condition code");
2347 case 0: Pred = ICmpInst::ICMP_EQ; break;
2348 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2349 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2350 case 4: Pred = ICmpInst::ICMP_NE; break;
2351 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2352 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2353 }
2354 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2355 }
2356
2357 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2358
2359 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2360}
2361
2362// Replace a masked intrinsic with an older unmasked intrinsic.
2364 Intrinsic::ID IID) {
2365 Value *Rep =
2366 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2367 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2368}
2369
2371 Value* A = CI.getArgOperand(0);
2372 Value* B = CI.getArgOperand(1);
2373 Value* Src = CI.getArgOperand(2);
2374 Value* Mask = CI.getArgOperand(3);
2375
2376 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2377 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2378 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2379 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2380 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2381 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2382}
2383
2385 Value* Op = CI.getArgOperand(0);
2386 Type* ReturnOp = CI.getType();
2387 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2388 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2389 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2390}
2391
2392// Replace intrinsic with unmasked version and a select.
2394 CallBase &CI, Value *&Rep) {
2395 Name = Name.substr(12); // Remove avx512.mask.
2396
2397 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2398 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2399 Intrinsic::ID IID;
2400 if (Name.starts_with("max.p")) {
2401 if (VecWidth == 128 && EltWidth == 32)
2402 IID = Intrinsic::x86_sse_max_ps;
2403 else if (VecWidth == 128 && EltWidth == 64)
2404 IID = Intrinsic::x86_sse2_max_pd;
2405 else if (VecWidth == 256 && EltWidth == 32)
2406 IID = Intrinsic::x86_avx_max_ps_256;
2407 else if (VecWidth == 256 && EltWidth == 64)
2408 IID = Intrinsic::x86_avx_max_pd_256;
2409 else
2410 llvm_unreachable("Unexpected intrinsic");
2411 } else if (Name.starts_with("min.p")) {
2412 if (VecWidth == 128 && EltWidth == 32)
2413 IID = Intrinsic::x86_sse_min_ps;
2414 else if (VecWidth == 128 && EltWidth == 64)
2415 IID = Intrinsic::x86_sse2_min_pd;
2416 else if (VecWidth == 256 && EltWidth == 32)
2417 IID = Intrinsic::x86_avx_min_ps_256;
2418 else if (VecWidth == 256 && EltWidth == 64)
2419 IID = Intrinsic::x86_avx_min_pd_256;
2420 else
2421 llvm_unreachable("Unexpected intrinsic");
2422 } else if (Name.starts_with("pshuf.b.")) {
2423 if (VecWidth == 128)
2424 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2425 else if (VecWidth == 256)
2426 IID = Intrinsic::x86_avx2_pshuf_b;
2427 else if (VecWidth == 512)
2428 IID = Intrinsic::x86_avx512_pshuf_b_512;
2429 else
2430 llvm_unreachable("Unexpected intrinsic");
2431 } else if (Name.starts_with("pmul.hr.sw.")) {
2432 if (VecWidth == 128)
2433 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2434 else if (VecWidth == 256)
2435 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2436 else if (VecWidth == 512)
2437 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2438 else
2439 llvm_unreachable("Unexpected intrinsic");
2440 } else if (Name.starts_with("pmulh.w.")) {
2441 if (VecWidth == 128)
2442 IID = Intrinsic::x86_sse2_pmulh_w;
2443 else if (VecWidth == 256)
2444 IID = Intrinsic::x86_avx2_pmulh_w;
2445 else if (VecWidth == 512)
2446 IID = Intrinsic::x86_avx512_pmulh_w_512;
2447 else
2448 llvm_unreachable("Unexpected intrinsic");
2449 } else if (Name.starts_with("pmulhu.w.")) {
2450 if (VecWidth == 128)
2451 IID = Intrinsic::x86_sse2_pmulhu_w;
2452 else if (VecWidth == 256)
2453 IID = Intrinsic::x86_avx2_pmulhu_w;
2454 else if (VecWidth == 512)
2455 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2456 else
2457 llvm_unreachable("Unexpected intrinsic");
2458 } else if (Name.starts_with("pmaddw.d.")) {
2459 if (VecWidth == 128)
2460 IID = Intrinsic::x86_sse2_pmadd_wd;
2461 else if (VecWidth == 256)
2462 IID = Intrinsic::x86_avx2_pmadd_wd;
2463 else if (VecWidth == 512)
2464 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2465 else
2466 llvm_unreachable("Unexpected intrinsic");
2467 } else if (Name.starts_with("pmaddubs.w.")) {
2468 if (VecWidth == 128)
2469 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2470 else if (VecWidth == 256)
2471 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2472 else if (VecWidth == 512)
2473 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2474 else
2475 llvm_unreachable("Unexpected intrinsic");
2476 } else if (Name.starts_with("packsswb.")) {
2477 if (VecWidth == 128)
2478 IID = Intrinsic::x86_sse2_packsswb_128;
2479 else if (VecWidth == 256)
2480 IID = Intrinsic::x86_avx2_packsswb;
2481 else if (VecWidth == 512)
2482 IID = Intrinsic::x86_avx512_packsswb_512;
2483 else
2484 llvm_unreachable("Unexpected intrinsic");
2485 } else if (Name.starts_with("packssdw.")) {
2486 if (VecWidth == 128)
2487 IID = Intrinsic::x86_sse2_packssdw_128;
2488 else if (VecWidth == 256)
2489 IID = Intrinsic::x86_avx2_packssdw;
2490 else if (VecWidth == 512)
2491 IID = Intrinsic::x86_avx512_packssdw_512;
2492 else
2493 llvm_unreachable("Unexpected intrinsic");
2494 } else if (Name.starts_with("packuswb.")) {
2495 if (VecWidth == 128)
2496 IID = Intrinsic::x86_sse2_packuswb_128;
2497 else if (VecWidth == 256)
2498 IID = Intrinsic::x86_avx2_packuswb;
2499 else if (VecWidth == 512)
2500 IID = Intrinsic::x86_avx512_packuswb_512;
2501 else
2502 llvm_unreachable("Unexpected intrinsic");
2503 } else if (Name.starts_with("packusdw.")) {
2504 if (VecWidth == 128)
2505 IID = Intrinsic::x86_sse41_packusdw;
2506 else if (VecWidth == 256)
2507 IID = Intrinsic::x86_avx2_packusdw;
2508 else if (VecWidth == 512)
2509 IID = Intrinsic::x86_avx512_packusdw_512;
2510 else
2511 llvm_unreachable("Unexpected intrinsic");
2512 } else if (Name.starts_with("vpermilvar.")) {
2513 if (VecWidth == 128 && EltWidth == 32)
2514 IID = Intrinsic::x86_avx_vpermilvar_ps;
2515 else if (VecWidth == 128 && EltWidth == 64)
2516 IID = Intrinsic::x86_avx_vpermilvar_pd;
2517 else if (VecWidth == 256 && EltWidth == 32)
2518 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2519 else if (VecWidth == 256 && EltWidth == 64)
2520 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2521 else if (VecWidth == 512 && EltWidth == 32)
2522 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2523 else if (VecWidth == 512 && EltWidth == 64)
2524 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2525 else
2526 llvm_unreachable("Unexpected intrinsic");
2527 } else if (Name == "cvtpd2dq.256") {
2528 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2529 } else if (Name == "cvtpd2ps.256") {
2530 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2531 } else if (Name == "cvttpd2dq.256") {
2532 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2533 } else if (Name == "cvttps2dq.128") {
2534 IID = Intrinsic::x86_sse2_cvttps2dq;
2535 } else if (Name == "cvttps2dq.256") {
2536 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2537 } else if (Name.starts_with("permvar.")) {
2538 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2539 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2540 IID = Intrinsic::x86_avx2_permps;
2541 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2542 IID = Intrinsic::x86_avx2_permd;
2543 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2544 IID = Intrinsic::x86_avx512_permvar_df_256;
2545 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2546 IID = Intrinsic::x86_avx512_permvar_di_256;
2547 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2548 IID = Intrinsic::x86_avx512_permvar_sf_512;
2549 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2550 IID = Intrinsic::x86_avx512_permvar_si_512;
2551 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2552 IID = Intrinsic::x86_avx512_permvar_df_512;
2553 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2554 IID = Intrinsic::x86_avx512_permvar_di_512;
2555 else if (VecWidth == 128 && EltWidth == 16)
2556 IID = Intrinsic::x86_avx512_permvar_hi_128;
2557 else if (VecWidth == 256 && EltWidth == 16)
2558 IID = Intrinsic::x86_avx512_permvar_hi_256;
2559 else if (VecWidth == 512 && EltWidth == 16)
2560 IID = Intrinsic::x86_avx512_permvar_hi_512;
2561 else if (VecWidth == 128 && EltWidth == 8)
2562 IID = Intrinsic::x86_avx512_permvar_qi_128;
2563 else if (VecWidth == 256 && EltWidth == 8)
2564 IID = Intrinsic::x86_avx512_permvar_qi_256;
2565 else if (VecWidth == 512 && EltWidth == 8)
2566 IID = Intrinsic::x86_avx512_permvar_qi_512;
2567 else
2568 llvm_unreachable("Unexpected intrinsic");
2569 } else if (Name.starts_with("dbpsadbw.")) {
2570 if (VecWidth == 128)
2571 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2572 else if (VecWidth == 256)
2573 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2574 else if (VecWidth == 512)
2575 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2576 else
2577 llvm_unreachable("Unexpected intrinsic");
2578 } else if (Name.starts_with("pmultishift.qb.")) {
2579 if (VecWidth == 128)
2580 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2581 else if (VecWidth == 256)
2582 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2583 else if (VecWidth == 512)
2584 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2585 else
2586 llvm_unreachable("Unexpected intrinsic");
2587 } else if (Name.starts_with("conflict.")) {
2588 if (Name[9] == 'd' && VecWidth == 128)
2589 IID = Intrinsic::x86_avx512_conflict_d_128;
2590 else if (Name[9] == 'd' && VecWidth == 256)
2591 IID = Intrinsic::x86_avx512_conflict_d_256;
2592 else if (Name[9] == 'd' && VecWidth == 512)
2593 IID = Intrinsic::x86_avx512_conflict_d_512;
2594 else if (Name[9] == 'q' && VecWidth == 128)
2595 IID = Intrinsic::x86_avx512_conflict_q_128;
2596 else if (Name[9] == 'q' && VecWidth == 256)
2597 IID = Intrinsic::x86_avx512_conflict_q_256;
2598 else if (Name[9] == 'q' && VecWidth == 512)
2599 IID = Intrinsic::x86_avx512_conflict_q_512;
2600 else
2601 llvm_unreachable("Unexpected intrinsic");
2602 } else if (Name.starts_with("pavg.")) {
2603 if (Name[5] == 'b' && VecWidth == 128)
2604 IID = Intrinsic::x86_sse2_pavg_b;
2605 else if (Name[5] == 'b' && VecWidth == 256)
2606 IID = Intrinsic::x86_avx2_pavg_b;
2607 else if (Name[5] == 'b' && VecWidth == 512)
2608 IID = Intrinsic::x86_avx512_pavg_b_512;
2609 else if (Name[5] == 'w' && VecWidth == 128)
2610 IID = Intrinsic::x86_sse2_pavg_w;
2611 else if (Name[5] == 'w' && VecWidth == 256)
2612 IID = Intrinsic::x86_avx2_pavg_w;
2613 else if (Name[5] == 'w' && VecWidth == 512)
2614 IID = Intrinsic::x86_avx512_pavg_w_512;
2615 else
2616 llvm_unreachable("Unexpected intrinsic");
2617 } else
2618 return false;
2619
2620 SmallVector<Value *, 4> Args(CI.args());
2621 Args.pop_back();
2622 Args.pop_back();
2623 Rep = Builder.CreateIntrinsic(IID, Args);
2624 unsigned NumArgs = CI.arg_size();
2625 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2626 CI.getArgOperand(NumArgs - 2));
2627 return true;
2628}
2629
2630/// Upgrade comment in call to inline asm that represents an objc retain release
2631/// marker.
2632void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2633 size_t Pos;
2634 if (AsmStr->find("mov\tfp") == 0 &&
2635 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2636 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2637 AsmStr->replace(Pos, 1, ";");
2638 }
2639}
2640
2642 Function *F, IRBuilder<> &Builder) {
2643 Value *Rep = nullptr;
2644
2645 if (Name == "abs.i" || Name == "abs.ll") {
2646 Value *Arg = CI->getArgOperand(0);
2647 Value *Neg = Builder.CreateNeg(Arg, "neg");
2648 Value *Cmp = Builder.CreateICmpSGE(
2649 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2650 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2651 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2652 Type *Ty = (Name == "abs.bf16")
2653 ? Builder.getBFloatTy()
2654 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2655 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2656 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2657 Rep = Builder.CreateBitCast(Abs, CI->getType());
2658 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2659 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2660 : Intrinsic::nvvm_fabs;
2661 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2662 } else if (Name.consume_front("ex2.approx.")) {
2663 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2664 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2665 : Intrinsic::nvvm_ex2_approx;
2666 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2667 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2668 Name.starts_with("atomic.load.add.f64.p")) {
2669 Value *Ptr = CI->getArgOperand(0);
2670 Value *Val = CI->getArgOperand(1);
2671 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2673 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2674 Name.starts_with("atomic.load.dec.32.p")) {
2675 Value *Ptr = CI->getArgOperand(0);
2676 Value *Val = CI->getArgOperand(1);
2677 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2679 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2681 } else if (Name == "clz.ll") {
2682 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2683 Value *Arg = CI->getArgOperand(0);
2684 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2685 {Arg, Builder.getFalse()},
2686 /*FMFSource=*/nullptr, "ctlz");
2687 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2688 } else if (Name == "popc.ll") {
2689 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2690 // i64.
2691 Value *Arg = CI->getArgOperand(0);
2692 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2693 Arg, /*FMFSource=*/nullptr, "ctpop");
2694 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2695 } else if (Name == "h2f") {
2696 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2697 {Builder.getFloatTy()}, CI->getArgOperand(0),
2698 /*FMFSource=*/nullptr, "h2f");
2699 } else if (Name.consume_front("bitcast.") &&
2700 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2701 Name == "d2ll")) {
2702 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2703 } else if (Name == "rotate.b32") {
2704 Value *Arg = CI->getOperand(0);
2705 Value *ShiftAmt = CI->getOperand(1);
2706 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2707 {Arg, Arg, ShiftAmt});
2708 } else if (Name == "rotate.b64") {
2709 Type *Int64Ty = Builder.getInt64Ty();
2710 Value *Arg = CI->getOperand(0);
2711 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2712 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2713 {Arg, Arg, ZExtShiftAmt});
2714 } else if (Name == "rotate.right.b64") {
2715 Type *Int64Ty = Builder.getInt64Ty();
2716 Value *Arg = CI->getOperand(0);
2717 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2718 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2719 {Arg, Arg, ZExtShiftAmt});
2720 } else if (Name == "swap.lo.hi.b64") {
2721 Type *Int64Ty = Builder.getInt64Ty();
2722 Value *Arg = CI->getOperand(0);
2723 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2724 {Arg, Arg, Builder.getInt64(32)});
2725 } else if ((Name.consume_front("ptr.gen.to.") &&
2726 consumeNVVMPtrAddrSpace(Name)) ||
2727 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2728 Name.starts_with(".to.gen"))) {
2729 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2730 } else if (Name.consume_front("ldg.global")) {
2731 Value *Ptr = CI->getArgOperand(0);
2732 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2733 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2734 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2735 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2736 MDNode *MD = MDNode::get(Builder.getContext(), {});
2737 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2738 return LD;
2739 } else if (Name == "tanh.approx.f32") {
2740 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2741 FastMathFlags FMF;
2742 FMF.setApproxFunc();
2743 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2744 FMF);
2745 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2746 Value *Arg =
2747 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2748 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2749 {}, {Arg});
2750 } else if (Name == "barrier") {
2751 Rep = Builder.CreateIntrinsic(
2752 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2753 {CI->getArgOperand(0), CI->getArgOperand(1)});
2754 } else if (Name == "barrier.sync") {
2755 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2756 {CI->getArgOperand(0)});
2757 } else if (Name == "barrier.sync.cnt") {
2758 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2759 {CI->getArgOperand(0), CI->getArgOperand(1)});
2760 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2761 Name == "barrier0.or") {
2762 Value *C = CI->getArgOperand(0);
2763 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2764
2765 Intrinsic::ID IID =
2767 .Case("barrier0.popc",
2768 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2769 .Case("barrier0.and",
2770 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2771 .Case("barrier0.or",
2772 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2773 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2774 Rep = Builder.CreateZExt(Bar, CI->getType());
2775 } else {
2777 if (IID != Intrinsic::not_intrinsic &&
2778 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2779 rename(F);
2780 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2782 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2783 Value *Arg = CI->getArgOperand(I);
2784 Type *OldType = Arg->getType();
2785 Type *NewType = NewFn->getArg(I)->getType();
2786 Args.push_back(
2787 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2788 ? Builder.CreateBitCast(Arg, NewType)
2789 : Arg);
2790 }
2791 Rep = Builder.CreateCall(NewFn, Args);
2792 if (F->getReturnType()->isIntegerTy())
2793 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2794 }
2795 }
2796
2797 return Rep;
2798}
2799
2801 IRBuilder<> &Builder) {
2802 LLVMContext &C = F->getContext();
2803 Value *Rep = nullptr;
2804
2805 if (Name.starts_with("sse4a.movnt.")) {
2807 Elts.push_back(
2808 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2809 MDNode *Node = MDNode::get(C, Elts);
2810
2811 Value *Arg0 = CI->getArgOperand(0);
2812 Value *Arg1 = CI->getArgOperand(1);
2813
2814 // Nontemporal (unaligned) store of the 0'th element of the float/double
2815 // vector.
2816 Value *Extract =
2817 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2818
2819 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2820 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2821 } else if (Name.starts_with("avx.movnt.") ||
2822 Name.starts_with("avx512.storent.")) {
2824 Elts.push_back(
2825 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2826 MDNode *Node = MDNode::get(C, Elts);
2827
2828 Value *Arg0 = CI->getArgOperand(0);
2829 Value *Arg1 = CI->getArgOperand(1);
2830
2831 StoreInst *SI = Builder.CreateAlignedStore(
2832 Arg1, Arg0,
2834 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2835 } else if (Name == "sse2.storel.dq") {
2836 Value *Arg0 = CI->getArgOperand(0);
2837 Value *Arg1 = CI->getArgOperand(1);
2838
2839 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2840 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2841 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2842 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2843 } else if (Name.starts_with("sse.storeu.") ||
2844 Name.starts_with("sse2.storeu.") ||
2845 Name.starts_with("avx.storeu.")) {
2846 Value *Arg0 = CI->getArgOperand(0);
2847 Value *Arg1 = CI->getArgOperand(1);
2848 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2849 } else if (Name == "avx512.mask.store.ss") {
2850 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2851 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2852 Mask, false);
2853 } else if (Name.starts_with("avx512.mask.store")) {
2854 // "avx512.mask.storeu." or "avx512.mask.store."
2855 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2856 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2857 CI->getArgOperand(2), Aligned);
2858 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2859 // Upgrade packed integer vector compare intrinsics to compare instructions.
2860 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2861 bool CmpEq = Name[9] == 'e';
2862 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2863 CI->getArgOperand(0), CI->getArgOperand(1));
2864 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2865 } else if (Name.starts_with("avx512.broadcastm")) {
2866 Type *ExtTy = Type::getInt32Ty(C);
2867 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2868 ExtTy = Type::getInt64Ty(C);
2869 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2870 ExtTy->getPrimitiveSizeInBits();
2871 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2872 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2873 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2874 Value *Vec = CI->getArgOperand(0);
2875 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2876 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2877 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2878 } else if (Name.starts_with("avx.sqrt.p") ||
2879 Name.starts_with("sse2.sqrt.p") ||
2880 Name.starts_with("sse.sqrt.p")) {
2881 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2882 {CI->getArgOperand(0)});
2883 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2884 if (CI->arg_size() == 4 &&
2885 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2886 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2887 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2888 : Intrinsic::x86_avx512_sqrt_pd_512;
2889
2890 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2891 Rep = Builder.CreateIntrinsic(IID, Args);
2892 } else {
2893 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2894 {CI->getArgOperand(0)});
2895 }
2896 Rep =
2897 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2898 } else if (Name.starts_with("avx512.ptestm") ||
2899 Name.starts_with("avx512.ptestnm")) {
2900 Value *Op0 = CI->getArgOperand(0);
2901 Value *Op1 = CI->getArgOperand(1);
2902 Value *Mask = CI->getArgOperand(2);
2903 Rep = Builder.CreateAnd(Op0, Op1);
2904 llvm::Type *Ty = Op0->getType();
2906 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2909 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2910 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2911 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2912 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2913 ->getNumElements();
2914 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2915 Rep =
2916 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2917 } else if (Name.starts_with("avx512.kunpck")) {
2918 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2919 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2920 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2921 int Indices[64];
2922 for (unsigned i = 0; i != NumElts; ++i)
2923 Indices[i] = i;
2924
2925 // First extract half of each vector. This gives better codegen than
2926 // doing it in a single shuffle.
2927 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2928 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2929 // Concat the vectors.
2930 // NOTE: Operands have to be swapped to match intrinsic definition.
2931 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2932 Rep = Builder.CreateBitCast(Rep, CI->getType());
2933 } else if (Name == "avx512.kand.w") {
2934 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2935 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2936 Rep = Builder.CreateAnd(LHS, RHS);
2937 Rep = Builder.CreateBitCast(Rep, CI->getType());
2938 } else if (Name == "avx512.kandn.w") {
2939 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2940 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2941 LHS = Builder.CreateNot(LHS);
2942 Rep = Builder.CreateAnd(LHS, RHS);
2943 Rep = Builder.CreateBitCast(Rep, CI->getType());
2944 } else if (Name == "avx512.kor.w") {
2945 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2946 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2947 Rep = Builder.CreateOr(LHS, RHS);
2948 Rep = Builder.CreateBitCast(Rep, CI->getType());
2949 } else if (Name == "avx512.kxor.w") {
2950 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2951 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2952 Rep = Builder.CreateXor(LHS, RHS);
2953 Rep = Builder.CreateBitCast(Rep, CI->getType());
2954 } else if (Name == "avx512.kxnor.w") {
2955 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2956 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2957 LHS = Builder.CreateNot(LHS);
2958 Rep = Builder.CreateXor(LHS, RHS);
2959 Rep = Builder.CreateBitCast(Rep, CI->getType());
2960 } else if (Name == "avx512.knot.w") {
2961 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2962 Rep = Builder.CreateNot(Rep);
2963 Rep = Builder.CreateBitCast(Rep, CI->getType());
2964 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2965 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2966 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2967 Rep = Builder.CreateOr(LHS, RHS);
2968 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2969 Value *C;
2970 if (Name[14] == 'c')
2971 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2972 else
2973 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2974 Rep = Builder.CreateICmpEQ(Rep, C);
2975 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2976 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2977 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2978 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2979 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2980 Type *I32Ty = Type::getInt32Ty(C);
2981 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2982 ConstantInt::get(I32Ty, 0));
2983 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2984 ConstantInt::get(I32Ty, 0));
2985 Value *EltOp;
2986 if (Name.contains(".add."))
2987 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2988 else if (Name.contains(".sub."))
2989 EltOp = Builder.CreateFSub(Elt0, Elt1);
2990 else if (Name.contains(".mul."))
2991 EltOp = Builder.CreateFMul(Elt0, Elt1);
2992 else
2993 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2994 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2995 ConstantInt::get(I32Ty, 0));
2996 } else if (Name.starts_with("avx512.mask.pcmp")) {
2997 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2998 bool CmpEq = Name[16] == 'e';
2999 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3000 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3001 Type *OpTy = CI->getArgOperand(0)->getType();
3002 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3003 Intrinsic::ID IID;
3004 switch (VecWidth) {
3005 default:
3006 llvm_unreachable("Unexpected intrinsic");
3007 case 128:
3008 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3009 break;
3010 case 256:
3011 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3012 break;
3013 case 512:
3014 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3015 break;
3016 }
3017
3018 Rep =
3019 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3020 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3021 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3022 Type *OpTy = CI->getArgOperand(0)->getType();
3023 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3024 unsigned EltWidth = OpTy->getScalarSizeInBits();
3025 Intrinsic::ID IID;
3026 if (VecWidth == 128 && EltWidth == 32)
3027 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3028 else if (VecWidth == 256 && EltWidth == 32)
3029 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3030 else if (VecWidth == 512 && EltWidth == 32)
3031 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3032 else if (VecWidth == 128 && EltWidth == 64)
3033 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3034 else if (VecWidth == 256 && EltWidth == 64)
3035 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3036 else if (VecWidth == 512 && EltWidth == 64)
3037 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3038 else
3039 llvm_unreachable("Unexpected intrinsic");
3040
3041 Rep =
3042 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3043 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3044 } else if (Name.starts_with("avx512.cmp.p")) {
3045 SmallVector<Value *, 4> Args(CI->args());
3046 Type *OpTy = Args[0]->getType();
3047 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3048 unsigned EltWidth = OpTy->getScalarSizeInBits();
3049 Intrinsic::ID IID;
3050 if (VecWidth == 128 && EltWidth == 32)
3051 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3052 else if (VecWidth == 256 && EltWidth == 32)
3053 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3054 else if (VecWidth == 512 && EltWidth == 32)
3055 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3056 else if (VecWidth == 128 && EltWidth == 64)
3057 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3058 else if (VecWidth == 256 && EltWidth == 64)
3059 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3060 else if (VecWidth == 512 && EltWidth == 64)
3061 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3062 else
3063 llvm_unreachable("Unexpected intrinsic");
3064
3066 if (VecWidth == 512)
3067 std::swap(Mask, Args.back());
3068 Args.push_back(Mask);
3069
3070 Rep = Builder.CreateIntrinsic(IID, Args);
3071 } else if (Name.starts_with("avx512.mask.cmp.")) {
3072 // Integer compare intrinsics.
3073 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3074 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3075 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3076 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3077 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3078 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3079 Name.starts_with("avx512.cvtw2mask.") ||
3080 Name.starts_with("avx512.cvtd2mask.") ||
3081 Name.starts_with("avx512.cvtq2mask.")) {
3082 Value *Op = CI->getArgOperand(0);
3083 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3084 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3085 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3086 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3087 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3088 Name.starts_with("avx512.mask.pabs")) {
3089 Rep = upgradeAbs(Builder, *CI);
3090 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3091 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3092 Name.starts_with("avx512.mask.pmaxs")) {
3093 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3094 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3095 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3096 Name.starts_with("avx512.mask.pmaxu")) {
3097 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3098 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3099 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3100 Name.starts_with("avx512.mask.pmins")) {
3101 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3102 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3103 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3104 Name.starts_with("avx512.mask.pminu")) {
3105 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3106 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3107 Name == "avx512.pmulu.dq.512" ||
3108 Name.starts_with("avx512.mask.pmulu.dq.")) {
3109 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3110 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3111 Name == "avx512.pmul.dq.512" ||
3112 Name.starts_with("avx512.mask.pmul.dq.")) {
3113 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3114 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3115 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3116 Rep =
3117 Builder.CreateSIToFP(CI->getArgOperand(1),
3118 cast<VectorType>(CI->getType())->getElementType());
3119 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3120 } else if (Name == "avx512.cvtusi2sd") {
3121 Rep =
3122 Builder.CreateUIToFP(CI->getArgOperand(1),
3123 cast<VectorType>(CI->getType())->getElementType());
3124 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3125 } else if (Name == "sse2.cvtss2sd") {
3126 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3127 Rep = Builder.CreateFPExt(
3128 Rep, cast<VectorType>(CI->getType())->getElementType());
3129 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3130 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3131 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3132 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3133 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3134 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3135 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3136 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3137 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3138 Name == "avx512.mask.cvtqq2ps.256" ||
3139 Name == "avx512.mask.cvtqq2ps.512" ||
3140 Name == "avx512.mask.cvtuqq2ps.256" ||
3141 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3142 Name == "avx.cvt.ps2.pd.256" ||
3143 Name == "avx512.mask.cvtps2pd.128" ||
3144 Name == "avx512.mask.cvtps2pd.256") {
3145 auto *DstTy = cast<FixedVectorType>(CI->getType());
3146 Rep = CI->getArgOperand(0);
3147 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3148
3149 unsigned NumDstElts = DstTy->getNumElements();
3150 if (NumDstElts < SrcTy->getNumElements()) {
3151 assert(NumDstElts == 2 && "Unexpected vector size");
3152 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3153 }
3154
3155 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3156 bool IsUnsigned = Name.contains("cvtu");
3157 if (IsPS2PD)
3158 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3159 else if (CI->arg_size() == 4 &&
3160 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3161 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3162 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3163 : Intrinsic::x86_avx512_sitofp_round;
3164 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3165 {Rep, CI->getArgOperand(3)});
3166 } else {
3167 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3168 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3169 }
3170
3171 if (CI->arg_size() >= 3)
3172 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3173 CI->getArgOperand(1));
3174 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3175 Name.starts_with("vcvtph2ps.")) {
3176 auto *DstTy = cast<FixedVectorType>(CI->getType());
3177 Rep = CI->getArgOperand(0);
3178 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3179 unsigned NumDstElts = DstTy->getNumElements();
3180 if (NumDstElts != SrcTy->getNumElements()) {
3181 assert(NumDstElts == 4 && "Unexpected vector size");
3182 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3183 }
3184 Rep = Builder.CreateBitCast(
3185 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3186 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3187 if (CI->arg_size() >= 3)
3188 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3189 CI->getArgOperand(1));
3190 } else if (Name.starts_with("avx512.mask.load")) {
3191 // "avx512.mask.loadu." or "avx512.mask.load."
3192 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3193 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3194 CI->getArgOperand(2), Aligned);
3195 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3196 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3197 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3198 ResultTy->getNumElements());
3199
3200 Rep = Builder.CreateIntrinsic(
3201 Intrinsic::masked_expandload, ResultTy,
3202 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3203 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3204 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3205 Value *MaskVec =
3206 getX86MaskVec(Builder, CI->getArgOperand(2),
3207 cast<FixedVectorType>(ResultTy)->getNumElements());
3208
3209 Rep = Builder.CreateIntrinsic(
3210 Intrinsic::masked_compressstore, ResultTy,
3211 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3212 } else if (Name.starts_with("avx512.mask.compress.") ||
3213 Name.starts_with("avx512.mask.expand.")) {
3214 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3215
3216 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3217 ResultTy->getNumElements());
3218
3219 bool IsCompress = Name[12] == 'c';
3220 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3221 : Intrinsic::x86_avx512_mask_expand;
3222 Rep = Builder.CreateIntrinsic(
3223 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3224 } else if (Name.starts_with("xop.vpcom")) {
3225 bool IsSigned;
3226 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3227 Name.ends_with("uq"))
3228 IsSigned = false;
3229 else if (Name.ends_with("b") || Name.ends_with("w") ||
3230 Name.ends_with("d") || Name.ends_with("q"))
3231 IsSigned = true;
3232 else
3233 llvm_unreachable("Unknown suffix");
3234
3235 unsigned Imm;
3236 if (CI->arg_size() == 3) {
3237 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3238 } else {
3239 Name = Name.substr(9); // strip off "xop.vpcom"
3240 if (Name.starts_with("lt"))
3241 Imm = 0;
3242 else if (Name.starts_with("le"))
3243 Imm = 1;
3244 else if (Name.starts_with("gt"))
3245 Imm = 2;
3246 else if (Name.starts_with("ge"))
3247 Imm = 3;
3248 else if (Name.starts_with("eq"))
3249 Imm = 4;
3250 else if (Name.starts_with("ne"))
3251 Imm = 5;
3252 else if (Name.starts_with("false"))
3253 Imm = 6;
3254 else if (Name.starts_with("true"))
3255 Imm = 7;
3256 else
3257 llvm_unreachable("Unknown condition");
3258 }
3259
3260 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3261 } else if (Name.starts_with("xop.vpcmov")) {
3262 Value *Sel = CI->getArgOperand(2);
3263 Value *NotSel = Builder.CreateNot(Sel);
3264 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3265 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3266 Rep = Builder.CreateOr(Sel0, Sel1);
3267 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3268 Name.starts_with("avx512.mask.prol")) {
3269 Rep = upgradeX86Rotate(Builder, *CI, false);
3270 } else if (Name.starts_with("avx512.pror") ||
3271 Name.starts_with("avx512.mask.pror")) {
3272 Rep = upgradeX86Rotate(Builder, *CI, true);
3273 } else if (Name.starts_with("avx512.vpshld.") ||
3274 Name.starts_with("avx512.mask.vpshld") ||
3275 Name.starts_with("avx512.maskz.vpshld")) {
3276 bool ZeroMask = Name[11] == 'z';
3277 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3278 } else if (Name.starts_with("avx512.vpshrd.") ||
3279 Name.starts_with("avx512.mask.vpshrd") ||
3280 Name.starts_with("avx512.maskz.vpshrd")) {
3281 bool ZeroMask = Name[11] == 'z';
3282 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3283 } else if (Name == "sse42.crc32.64.8") {
3284 Value *Trunc0 =
3285 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3286 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3287 {Trunc0, CI->getArgOperand(1)});
3288 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3289 } else if (Name.starts_with("avx.vbroadcast.s") ||
3290 Name.starts_with("avx512.vbroadcast.s")) {
3291 // Replace broadcasts with a series of insertelements.
3292 auto *VecTy = cast<FixedVectorType>(CI->getType());
3293 Type *EltTy = VecTy->getElementType();
3294 unsigned EltNum = VecTy->getNumElements();
3295 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3296 Type *I32Ty = Type::getInt32Ty(C);
3297 Rep = PoisonValue::get(VecTy);
3298 for (unsigned I = 0; I < EltNum; ++I)
3299 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3300 } else if (Name.starts_with("sse41.pmovsx") ||
3301 Name.starts_with("sse41.pmovzx") ||
3302 Name.starts_with("avx2.pmovsx") ||
3303 Name.starts_with("avx2.pmovzx") ||
3304 Name.starts_with("avx512.mask.pmovsx") ||
3305 Name.starts_with("avx512.mask.pmovzx")) {
3306 auto *DstTy = cast<FixedVectorType>(CI->getType());
3307 unsigned NumDstElts = DstTy->getNumElements();
3308
3309 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3310 SmallVector<int, 8> ShuffleMask(NumDstElts);
3311 for (unsigned i = 0; i != NumDstElts; ++i)
3312 ShuffleMask[i] = i;
3313
3314 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3315
3316 bool DoSext = Name.contains("pmovsx");
3317 Rep =
3318 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3319 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3320 if (CI->arg_size() == 3)
3321 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3322 CI->getArgOperand(1));
3323 } else if (Name == "avx512.mask.pmov.qd.256" ||
3324 Name == "avx512.mask.pmov.qd.512" ||
3325 Name == "avx512.mask.pmov.wb.256" ||
3326 Name == "avx512.mask.pmov.wb.512") {
3327 Type *Ty = CI->getArgOperand(1)->getType();
3328 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3329 Rep =
3330 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3331 } else if (Name.starts_with("avx.vbroadcastf128") ||
3332 Name == "avx2.vbroadcasti128") {
3333 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3334 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3335 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3336 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3337 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3338 if (NumSrcElts == 2)
3339 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3340 else
3341 Rep = Builder.CreateShuffleVector(Load,
3342 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3343 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3344 Name.starts_with("avx512.mask.shuf.f")) {
3345 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3346 Type *VT = CI->getType();
3347 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3348 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3349 unsigned ControlBitsMask = NumLanes - 1;
3350 unsigned NumControlBits = NumLanes / 2;
3351 SmallVector<int, 8> ShuffleMask(0);
3352
3353 for (unsigned l = 0; l != NumLanes; ++l) {
3354 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3355 // We actually need the other source.
3356 if (l >= NumLanes / 2)
3357 LaneMask += NumLanes;
3358 for (unsigned i = 0; i != NumElementsInLane; ++i)
3359 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3360 }
3361 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3362 CI->getArgOperand(1), ShuffleMask);
3363 Rep =
3364 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3365 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3366 Name.starts_with("avx512.mask.broadcasti")) {
3367 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3368 ->getNumElements();
3369 unsigned NumDstElts =
3370 cast<FixedVectorType>(CI->getType())->getNumElements();
3371
3372 SmallVector<int, 8> ShuffleMask(NumDstElts);
3373 for (unsigned i = 0; i != NumDstElts; ++i)
3374 ShuffleMask[i] = i % NumSrcElts;
3375
3376 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3377 CI->getArgOperand(0), ShuffleMask);
3378 Rep =
3379 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3380 } else if (Name.starts_with("avx2.pbroadcast") ||
3381 Name.starts_with("avx2.vbroadcast") ||
3382 Name.starts_with("avx512.pbroadcast") ||
3383 Name.starts_with("avx512.mask.broadcast.s")) {
3384 // Replace vp?broadcasts with a vector shuffle.
3385 Value *Op = CI->getArgOperand(0);
3386 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3387 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3390 Rep = Builder.CreateShuffleVector(Op, M);
3391
3392 if (CI->arg_size() == 3)
3393 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3394 CI->getArgOperand(1));
3395 } else if (Name.starts_with("sse2.padds.") ||
3396 Name.starts_with("avx2.padds.") ||
3397 Name.starts_with("avx512.padds.") ||
3398 Name.starts_with("avx512.mask.padds.")) {
3399 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3400 } else if (Name.starts_with("sse2.psubs.") ||
3401 Name.starts_with("avx2.psubs.") ||
3402 Name.starts_with("avx512.psubs.") ||
3403 Name.starts_with("avx512.mask.psubs.")) {
3404 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3405 } else if (Name.starts_with("sse2.paddus.") ||
3406 Name.starts_with("avx2.paddus.") ||
3407 Name.starts_with("avx512.mask.paddus.")) {
3408 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3409 } else if (Name.starts_with("sse2.psubus.") ||
3410 Name.starts_with("avx2.psubus.") ||
3411 Name.starts_with("avx512.mask.psubus.")) {
3412 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3413 } else if (Name.starts_with("avx512.mask.palignr.")) {
3414 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3415 CI->getArgOperand(1), CI->getArgOperand(2),
3416 CI->getArgOperand(3), CI->getArgOperand(4),
3417 false);
3418 } else if (Name.starts_with("avx512.mask.valign.")) {
3420 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3421 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3422 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3423 // 128/256-bit shift left specified in bits.
3424 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3425 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3426 Shift / 8); // Shift is in bits.
3427 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3428 // 128/256-bit shift right specified in bits.
3429 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3430 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3431 Shift / 8); // Shift is in bits.
3432 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3433 Name == "avx512.psll.dq.512") {
3434 // 128/256/512-bit shift left specified in bytes.
3435 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3436 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3437 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3438 Name == "avx512.psrl.dq.512") {
3439 // 128/256/512-bit shift right specified in bytes.
3440 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3441 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3442 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3443 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3444 Name.starts_with("avx2.pblendd.")) {
3445 Value *Op0 = CI->getArgOperand(0);
3446 Value *Op1 = CI->getArgOperand(1);
3447 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3448 auto *VecTy = cast<FixedVectorType>(CI->getType());
3449 unsigned NumElts = VecTy->getNumElements();
3450
3451 SmallVector<int, 16> Idxs(NumElts);
3452 for (unsigned i = 0; i != NumElts; ++i)
3453 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3454
3455 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3456 } else if (Name.starts_with("avx.vinsertf128.") ||
3457 Name == "avx2.vinserti128" ||
3458 Name.starts_with("avx512.mask.insert")) {
3459 Value *Op0 = CI->getArgOperand(0);
3460 Value *Op1 = CI->getArgOperand(1);
3461 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3462 unsigned DstNumElts =
3463 cast<FixedVectorType>(CI->getType())->getNumElements();
3464 unsigned SrcNumElts =
3465 cast<FixedVectorType>(Op1->getType())->getNumElements();
3466 unsigned Scale = DstNumElts / SrcNumElts;
3467
3468 // Mask off the high bits of the immediate value; hardware ignores those.
3469 Imm = Imm % Scale;
3470
3471 // Extend the second operand into a vector the size of the destination.
3472 SmallVector<int, 8> Idxs(DstNumElts);
3473 for (unsigned i = 0; i != SrcNumElts; ++i)
3474 Idxs[i] = i;
3475 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3476 Idxs[i] = SrcNumElts;
3477 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3478
3479 // Insert the second operand into the first operand.
3480
3481 // Note that there is no guarantee that instruction lowering will actually
3482 // produce a vinsertf128 instruction for the created shuffles. In
3483 // particular, the 0 immediate case involves no lane changes, so it can
3484 // be handled as a blend.
3485
3486 // Example of shuffle mask for 32-bit elements:
3487 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3488 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3489
3490 // First fill with identify mask.
3491 for (unsigned i = 0; i != DstNumElts; ++i)
3492 Idxs[i] = i;
3493 // Then replace the elements where we need to insert.
3494 for (unsigned i = 0; i != SrcNumElts; ++i)
3495 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3496 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3497
3498 // If the intrinsic has a mask operand, handle that.
3499 if (CI->arg_size() == 5)
3500 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3501 CI->getArgOperand(3));
3502 } else if (Name.starts_with("avx.vextractf128.") ||
3503 Name == "avx2.vextracti128" ||
3504 Name.starts_with("avx512.mask.vextract")) {
3505 Value *Op0 = CI->getArgOperand(0);
3506 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3507 unsigned DstNumElts =
3508 cast<FixedVectorType>(CI->getType())->getNumElements();
3509 unsigned SrcNumElts =
3510 cast<FixedVectorType>(Op0->getType())->getNumElements();
3511 unsigned Scale = SrcNumElts / DstNumElts;
3512
3513 // Mask off the high bits of the immediate value; hardware ignores those.
3514 Imm = Imm % Scale;
3515
3516 // Get indexes for the subvector of the input vector.
3517 SmallVector<int, 8> Idxs(DstNumElts);
3518 for (unsigned i = 0; i != DstNumElts; ++i) {
3519 Idxs[i] = i + (Imm * DstNumElts);
3520 }
3521 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3522
3523 // If the intrinsic has a mask operand, handle that.
3524 if (CI->arg_size() == 4)
3525 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3526 CI->getArgOperand(2));
3527 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3528 Name.starts_with("avx512.mask.perm.di.")) {
3529 Value *Op0 = CI->getArgOperand(0);
3530 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3531 auto *VecTy = cast<FixedVectorType>(CI->getType());
3532 unsigned NumElts = VecTy->getNumElements();
3533
3534 SmallVector<int, 8> Idxs(NumElts);
3535 for (unsigned i = 0; i != NumElts; ++i)
3536 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3537
3538 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3539
3540 if (CI->arg_size() == 4)
3541 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3542 CI->getArgOperand(2));
3543 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3544 // The immediate permute control byte looks like this:
3545 // [1:0] - select 128 bits from sources for low half of destination
3546 // [2] - ignore
3547 // [3] - zero low half of destination
3548 // [5:4] - select 128 bits from sources for high half of destination
3549 // [6] - ignore
3550 // [7] - zero high half of destination
3551
3552 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3553
3554 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3555 unsigned HalfSize = NumElts / 2;
3556 SmallVector<int, 8> ShuffleMask(NumElts);
3557
3558 // Determine which operand(s) are actually in use for this instruction.
3559 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3560 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3561
3562 // If needed, replace operands based on zero mask.
3563 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3564 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3565
3566 // Permute low half of result.
3567 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3568 for (unsigned i = 0; i < HalfSize; ++i)
3569 ShuffleMask[i] = StartIndex + i;
3570
3571 // Permute high half of result.
3572 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3573 for (unsigned i = 0; i < HalfSize; ++i)
3574 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3575
3576 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3577
3578 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3579 Name.starts_with("avx512.mask.vpermil.p") ||
3580 Name.starts_with("avx512.mask.pshuf.d.")) {
3581 Value *Op0 = CI->getArgOperand(0);
3582 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3583 auto *VecTy = cast<FixedVectorType>(CI->getType());
3584 unsigned NumElts = VecTy->getNumElements();
3585 // Calculate the size of each index in the immediate.
3586 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3587 unsigned IdxMask = ((1 << IdxSize) - 1);
3588
3589 SmallVector<int, 8> Idxs(NumElts);
3590 // Lookup the bits for this element, wrapping around the immediate every
3591 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3592 // to offset by the first index of each group.
3593 for (unsigned i = 0; i != NumElts; ++i)
3594 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3595
3596 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3597
3598 if (CI->arg_size() == 4)
3599 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3600 CI->getArgOperand(2));
3601 } else if (Name == "sse2.pshufl.w" ||
3602 Name.starts_with("avx512.mask.pshufl.w.")) {
3603 Value *Op0 = CI->getArgOperand(0);
3604 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3605 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3606
3607 SmallVector<int, 16> Idxs(NumElts);
3608 for (unsigned l = 0; l != NumElts; l += 8) {
3609 for (unsigned i = 0; i != 4; ++i)
3610 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3611 for (unsigned i = 4; i != 8; ++i)
3612 Idxs[i + l] = i + l;
3613 }
3614
3615 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3616
3617 if (CI->arg_size() == 4)
3618 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3619 CI->getArgOperand(2));
3620 } else if (Name == "sse2.pshufh.w" ||
3621 Name.starts_with("avx512.mask.pshufh.w.")) {
3622 Value *Op0 = CI->getArgOperand(0);
3623 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3624 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3625
3626 SmallVector<int, 16> Idxs(NumElts);
3627 for (unsigned l = 0; l != NumElts; l += 8) {
3628 for (unsigned i = 0; i != 4; ++i)
3629 Idxs[i + l] = i + l;
3630 for (unsigned i = 0; i != 4; ++i)
3631 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3632 }
3633
3634 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3635
3636 if (CI->arg_size() == 4)
3637 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3638 CI->getArgOperand(2));
3639 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3640 Value *Op0 = CI->getArgOperand(0);
3641 Value *Op1 = CI->getArgOperand(1);
3642 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3643 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3644
3645 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3646 unsigned HalfLaneElts = NumLaneElts / 2;
3647
3648 SmallVector<int, 16> Idxs(NumElts);
3649 for (unsigned i = 0; i != NumElts; ++i) {
3650 // Base index is the starting element of the lane.
3651 Idxs[i] = i - (i % NumLaneElts);
3652 // If we are half way through the lane switch to the other source.
3653 if ((i % NumLaneElts) >= HalfLaneElts)
3654 Idxs[i] += NumElts;
3655 // Now select the specific element. By adding HalfLaneElts bits from
3656 // the immediate. Wrapping around the immediate every 8-bits.
3657 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3658 }
3659
3660 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3661
3662 Rep =
3663 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3664 } else if (Name.starts_with("avx512.mask.movddup") ||
3665 Name.starts_with("avx512.mask.movshdup") ||
3666 Name.starts_with("avx512.mask.movsldup")) {
3667 Value *Op0 = CI->getArgOperand(0);
3668 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3669 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3670
3671 unsigned Offset = 0;
3672 if (Name.starts_with("avx512.mask.movshdup."))
3673 Offset = 1;
3674
3675 SmallVector<int, 16> Idxs(NumElts);
3676 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3677 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3678 Idxs[i + l + 0] = i + l + Offset;
3679 Idxs[i + l + 1] = i + l + Offset;
3680 }
3681
3682 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3683
3684 Rep =
3685 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3686 } else if (Name.starts_with("avx512.mask.punpckl") ||
3687 Name.starts_with("avx512.mask.unpckl.")) {
3688 Value *Op0 = CI->getArgOperand(0);
3689 Value *Op1 = CI->getArgOperand(1);
3690 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3691 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3692
3693 SmallVector<int, 64> Idxs(NumElts);
3694 for (int l = 0; l != NumElts; l += NumLaneElts)
3695 for (int i = 0; i != NumLaneElts; ++i)
3696 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3697
3698 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3699
3700 Rep =
3701 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3702 } else if (Name.starts_with("avx512.mask.punpckh") ||
3703 Name.starts_with("avx512.mask.unpckh.")) {
3704 Value *Op0 = CI->getArgOperand(0);
3705 Value *Op1 = CI->getArgOperand(1);
3706 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3707 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3708
3709 SmallVector<int, 64> Idxs(NumElts);
3710 for (int l = 0; l != NumElts; l += NumLaneElts)
3711 for (int i = 0; i != NumLaneElts; ++i)
3712 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3713
3714 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3715
3716 Rep =
3717 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3718 } else if (Name.starts_with("avx512.mask.and.") ||
3719 Name.starts_with("avx512.mask.pand.")) {
3720 VectorType *FTy = cast<VectorType>(CI->getType());
3722 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3723 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3724 Rep = Builder.CreateBitCast(Rep, FTy);
3725 Rep =
3726 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3727 } else if (Name.starts_with("avx512.mask.andn.") ||
3728 Name.starts_with("avx512.mask.pandn.")) {
3729 VectorType *FTy = cast<VectorType>(CI->getType());
3731 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3732 Rep = Builder.CreateAnd(Rep,
3733 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3734 Rep = Builder.CreateBitCast(Rep, FTy);
3735 Rep =
3736 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3737 } else if (Name.starts_with("avx512.mask.or.") ||
3738 Name.starts_with("avx512.mask.por.")) {
3739 VectorType *FTy = cast<VectorType>(CI->getType());
3741 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3742 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3743 Rep = Builder.CreateBitCast(Rep, FTy);
3744 Rep =
3745 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3746 } else if (Name.starts_with("avx512.mask.xor.") ||
3747 Name.starts_with("avx512.mask.pxor.")) {
3748 VectorType *FTy = cast<VectorType>(CI->getType());
3750 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3751 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3752 Rep = Builder.CreateBitCast(Rep, FTy);
3753 Rep =
3754 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3755 } else if (Name.starts_with("avx512.mask.padd.")) {
3756 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3757 Rep =
3758 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3759 } else if (Name.starts_with("avx512.mask.psub.")) {
3760 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3761 Rep =
3762 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3763 } else if (Name.starts_with("avx512.mask.pmull.")) {
3764 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3765 Rep =
3766 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3767 } else if (Name.starts_with("avx512.mask.add.p")) {
3768 if (Name.ends_with(".512")) {
3769 Intrinsic::ID IID;
3770 if (Name[17] == 's')
3771 IID = Intrinsic::x86_avx512_add_ps_512;
3772 else
3773 IID = Intrinsic::x86_avx512_add_pd_512;
3774
3775 Rep = Builder.CreateIntrinsic(
3776 IID,
3777 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3778 } else {
3779 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3780 }
3781 Rep =
3782 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3783 } else if (Name.starts_with("avx512.mask.div.p")) {
3784 if (Name.ends_with(".512")) {
3785 Intrinsic::ID IID;
3786 if (Name[17] == 's')
3787 IID = Intrinsic::x86_avx512_div_ps_512;
3788 else
3789 IID = Intrinsic::x86_avx512_div_pd_512;
3790
3791 Rep = Builder.CreateIntrinsic(
3792 IID,
3793 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3794 } else {
3795 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3796 }
3797 Rep =
3798 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3799 } else if (Name.starts_with("avx512.mask.mul.p")) {
3800 if (Name.ends_with(".512")) {
3801 Intrinsic::ID IID;
3802 if (Name[17] == 's')
3803 IID = Intrinsic::x86_avx512_mul_ps_512;
3804 else
3805 IID = Intrinsic::x86_avx512_mul_pd_512;
3806
3807 Rep = Builder.CreateIntrinsic(
3808 IID,
3809 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3810 } else {
3811 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3812 }
3813 Rep =
3814 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3815 } else if (Name.starts_with("avx512.mask.sub.p")) {
3816 if (Name.ends_with(".512")) {
3817 Intrinsic::ID IID;
3818 if (Name[17] == 's')
3819 IID = Intrinsic::x86_avx512_sub_ps_512;
3820 else
3821 IID = Intrinsic::x86_avx512_sub_pd_512;
3822
3823 Rep = Builder.CreateIntrinsic(
3824 IID,
3825 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3826 } else {
3827 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3828 }
3829 Rep =
3830 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3831 } else if ((Name.starts_with("avx512.mask.max.p") ||
3832 Name.starts_with("avx512.mask.min.p")) &&
3833 Name.drop_front(18) == ".512") {
3834 bool IsDouble = Name[17] == 'd';
3835 bool IsMin = Name[13] == 'i';
3836 static const Intrinsic::ID MinMaxTbl[2][2] = {
3837 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3838 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3839 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3840
3841 Rep = Builder.CreateIntrinsic(
3842 IID,
3843 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3844 Rep =
3845 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3846 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3847 Rep =
3848 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3849 {CI->getArgOperand(0), Builder.getInt1(false)});
3850 Rep =
3851 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3852 } else if (Name.starts_with("avx512.mask.psll")) {
3853 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3854 bool IsVariable = Name[16] == 'v';
3855 char Size = Name[16] == '.' ? Name[17]
3856 : Name[17] == '.' ? Name[18]
3857 : Name[18] == '.' ? Name[19]
3858 : Name[20];
3859
3860 Intrinsic::ID IID;
3861 if (IsVariable && Name[17] != '.') {
3862 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3863 IID = Intrinsic::x86_avx2_psllv_q;
3864 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3865 IID = Intrinsic::x86_avx2_psllv_q_256;
3866 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3867 IID = Intrinsic::x86_avx2_psllv_d;
3868 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3869 IID = Intrinsic::x86_avx2_psllv_d_256;
3870 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3871 IID = Intrinsic::x86_avx512_psllv_w_128;
3872 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3873 IID = Intrinsic::x86_avx512_psllv_w_256;
3874 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3875 IID = Intrinsic::x86_avx512_psllv_w_512;
3876 else
3877 llvm_unreachable("Unexpected size");
3878 } else if (Name.ends_with(".128")) {
3879 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3880 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3881 : Intrinsic::x86_sse2_psll_d;
3882 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3883 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3884 : Intrinsic::x86_sse2_psll_q;
3885 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3886 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3887 : Intrinsic::x86_sse2_psll_w;
3888 else
3889 llvm_unreachable("Unexpected size");
3890 } else if (Name.ends_with(".256")) {
3891 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3892 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3893 : Intrinsic::x86_avx2_psll_d;
3894 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3895 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3896 : Intrinsic::x86_avx2_psll_q;
3897 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3898 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3899 : Intrinsic::x86_avx2_psll_w;
3900 else
3901 llvm_unreachable("Unexpected size");
3902 } else {
3903 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3904 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3905 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3906 : Intrinsic::x86_avx512_psll_d_512;
3907 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3908 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3909 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3910 : Intrinsic::x86_avx512_psll_q_512;
3911 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3912 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3913 : Intrinsic::x86_avx512_psll_w_512;
3914 else
3915 llvm_unreachable("Unexpected size");
3916 }
3917
3918 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3919 } else if (Name.starts_with("avx512.mask.psrl")) {
3920 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3921 bool IsVariable = Name[16] == 'v';
3922 char Size = Name[16] == '.' ? Name[17]
3923 : Name[17] == '.' ? Name[18]
3924 : Name[18] == '.' ? Name[19]
3925 : Name[20];
3926
3927 Intrinsic::ID IID;
3928 if (IsVariable && Name[17] != '.') {
3929 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3930 IID = Intrinsic::x86_avx2_psrlv_q;
3931 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3932 IID = Intrinsic::x86_avx2_psrlv_q_256;
3933 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3934 IID = Intrinsic::x86_avx2_psrlv_d;
3935 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3936 IID = Intrinsic::x86_avx2_psrlv_d_256;
3937 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3938 IID = Intrinsic::x86_avx512_psrlv_w_128;
3939 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3940 IID = Intrinsic::x86_avx512_psrlv_w_256;
3941 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3942 IID = Intrinsic::x86_avx512_psrlv_w_512;
3943 else
3944 llvm_unreachable("Unexpected size");
3945 } else if (Name.ends_with(".128")) {
3946 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3947 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3948 : Intrinsic::x86_sse2_psrl_d;
3949 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3950 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3951 : Intrinsic::x86_sse2_psrl_q;
3952 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3953 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3954 : Intrinsic::x86_sse2_psrl_w;
3955 else
3956 llvm_unreachable("Unexpected size");
3957 } else if (Name.ends_with(".256")) {
3958 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3959 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3960 : Intrinsic::x86_avx2_psrl_d;
3961 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3962 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3963 : Intrinsic::x86_avx2_psrl_q;
3964 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3965 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3966 : Intrinsic::x86_avx2_psrl_w;
3967 else
3968 llvm_unreachable("Unexpected size");
3969 } else {
3970 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3971 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3972 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3973 : Intrinsic::x86_avx512_psrl_d_512;
3974 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3975 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3976 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3977 : Intrinsic::x86_avx512_psrl_q_512;
3978 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3979 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3980 : Intrinsic::x86_avx512_psrl_w_512;
3981 else
3982 llvm_unreachable("Unexpected size");
3983 }
3984
3985 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3986 } else if (Name.starts_with("avx512.mask.psra")) {
3987 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3988 bool IsVariable = Name[16] == 'v';
3989 char Size = Name[16] == '.' ? Name[17]
3990 : Name[17] == '.' ? Name[18]
3991 : Name[18] == '.' ? Name[19]
3992 : Name[20];
3993
3994 Intrinsic::ID IID;
3995 if (IsVariable && Name[17] != '.') {
3996 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3997 IID = Intrinsic::x86_avx2_psrav_d;
3998 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3999 IID = Intrinsic::x86_avx2_psrav_d_256;
4000 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4001 IID = Intrinsic::x86_avx512_psrav_w_128;
4002 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4003 IID = Intrinsic::x86_avx512_psrav_w_256;
4004 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4005 IID = Intrinsic::x86_avx512_psrav_w_512;
4006 else
4007 llvm_unreachable("Unexpected size");
4008 } else if (Name.ends_with(".128")) {
4009 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4010 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4011 : Intrinsic::x86_sse2_psra_d;
4012 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4013 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4014 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4015 : Intrinsic::x86_avx512_psra_q_128;
4016 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4017 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4018 : Intrinsic::x86_sse2_psra_w;
4019 else
4020 llvm_unreachable("Unexpected size");
4021 } else if (Name.ends_with(".256")) {
4022 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4023 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4024 : Intrinsic::x86_avx2_psra_d;
4025 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4026 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4027 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4028 : Intrinsic::x86_avx512_psra_q_256;
4029 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4030 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4031 : Intrinsic::x86_avx2_psra_w;
4032 else
4033 llvm_unreachable("Unexpected size");
4034 } else {
4035 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4036 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4037 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4038 : Intrinsic::x86_avx512_psra_d_512;
4039 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4040 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4041 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4042 : Intrinsic::x86_avx512_psra_q_512;
4043 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4044 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4045 : Intrinsic::x86_avx512_psra_w_512;
4046 else
4047 llvm_unreachable("Unexpected size");
4048 }
4049
4050 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4051 } else if (Name.starts_with("avx512.mask.move.s")) {
4052 Rep = upgradeMaskedMove(Builder, *CI);
4053 } else if (Name.starts_with("avx512.cvtmask2")) {
4054 Rep = upgradeMaskToInt(Builder, *CI);
4055 } else if (Name.ends_with(".movntdqa")) {
4057 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4058
4059 LoadInst *LI = Builder.CreateAlignedLoad(
4060 CI->getType(), CI->getArgOperand(0),
4062 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4063 Rep = LI;
4064 } else if (Name.starts_with("fma.vfmadd.") ||
4065 Name.starts_with("fma.vfmsub.") ||
4066 Name.starts_with("fma.vfnmadd.") ||
4067 Name.starts_with("fma.vfnmsub.")) {
4068 bool NegMul = Name[6] == 'n';
4069 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4070 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4071
4072 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4073 CI->getArgOperand(2)};
4074
4075 if (IsScalar) {
4076 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4077 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4078 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4079 }
4080
4081 if (NegMul && !IsScalar)
4082 Ops[0] = Builder.CreateFNeg(Ops[0]);
4083 if (NegMul && IsScalar)
4084 Ops[1] = Builder.CreateFNeg(Ops[1]);
4085 if (NegAcc)
4086 Ops[2] = Builder.CreateFNeg(Ops[2]);
4087
4088 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4089
4090 if (IsScalar)
4091 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4092 } else if (Name.starts_with("fma4.vfmadd.s")) {
4093 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4094 CI->getArgOperand(2)};
4095
4096 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4097 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4098 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4099
4100 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4101
4102 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4103 Rep, (uint64_t)0);
4104 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4105 Name.starts_with("avx512.maskz.vfmadd.s") ||
4106 Name.starts_with("avx512.mask3.vfmadd.s") ||
4107 Name.starts_with("avx512.mask3.vfmsub.s") ||
4108 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4109 bool IsMask3 = Name[11] == '3';
4110 bool IsMaskZ = Name[11] == 'z';
4111 // Drop the "avx512.mask." to make it easier.
4112 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4113 bool NegMul = Name[2] == 'n';
4114 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4115
4116 Value *A = CI->getArgOperand(0);
4117 Value *B = CI->getArgOperand(1);
4118 Value *C = CI->getArgOperand(2);
4119
4120 if (NegMul && (IsMask3 || IsMaskZ))
4121 A = Builder.CreateFNeg(A);
4122 if (NegMul && !(IsMask3 || IsMaskZ))
4123 B = Builder.CreateFNeg(B);
4124 if (NegAcc)
4125 C = Builder.CreateFNeg(C);
4126
4127 A = Builder.CreateExtractElement(A, (uint64_t)0);
4128 B = Builder.CreateExtractElement(B, (uint64_t)0);
4129 C = Builder.CreateExtractElement(C, (uint64_t)0);
4130
4131 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4132 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4133 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4134
4135 Intrinsic::ID IID;
4136 if (Name.back() == 'd')
4137 IID = Intrinsic::x86_avx512_vfmadd_f64;
4138 else
4139 IID = Intrinsic::x86_avx512_vfmadd_f32;
4140 Rep = Builder.CreateIntrinsic(IID, Ops);
4141 } else {
4142 Rep = Builder.CreateFMA(A, B, C);
4143 }
4144
4145 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4146 : IsMask3 ? C
4147 : A;
4148
4149 // For Mask3 with NegAcc, we need to create a new extractelement that
4150 // avoids the negation above.
4151 if (NegAcc && IsMask3)
4152 PassThru =
4153 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4154
4155 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4156 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4157 (uint64_t)0);
4158 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4159 Name.starts_with("avx512.mask.vfnmadd.p") ||
4160 Name.starts_with("avx512.mask.vfnmsub.p") ||
4161 Name.starts_with("avx512.mask3.vfmadd.p") ||
4162 Name.starts_with("avx512.mask3.vfmsub.p") ||
4163 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4164 Name.starts_with("avx512.maskz.vfmadd.p")) {
4165 bool IsMask3 = Name[11] == '3';
4166 bool IsMaskZ = Name[11] == 'z';
4167 // Drop the "avx512.mask." to make it easier.
4168 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4169 bool NegMul = Name[2] == 'n';
4170 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4171
4172 Value *A = CI->getArgOperand(0);
4173 Value *B = CI->getArgOperand(1);
4174 Value *C = CI->getArgOperand(2);
4175
4176 if (NegMul && (IsMask3 || IsMaskZ))
4177 A = Builder.CreateFNeg(A);
4178 if (NegMul && !(IsMask3 || IsMaskZ))
4179 B = Builder.CreateFNeg(B);
4180 if (NegAcc)
4181 C = Builder.CreateFNeg(C);
4182
4183 if (CI->arg_size() == 5 &&
4184 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4185 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4186 Intrinsic::ID IID;
4187 // Check the character before ".512" in string.
4188 if (Name[Name.size() - 5] == 's')
4189 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4190 else
4191 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4192
4193 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4194 } else {
4195 Rep = Builder.CreateFMA(A, B, C);
4196 }
4197
4198 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4199 : IsMask3 ? CI->getArgOperand(2)
4200 : CI->getArgOperand(0);
4201
4202 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4203 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4204 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4205 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4206 Intrinsic::ID IID;
4207 if (VecWidth == 128 && EltWidth == 32)
4208 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4209 else if (VecWidth == 256 && EltWidth == 32)
4210 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4211 else if (VecWidth == 128 && EltWidth == 64)
4212 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4213 else if (VecWidth == 256 && EltWidth == 64)
4214 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4215 else
4216 llvm_unreachable("Unexpected intrinsic");
4217
4218 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4219 CI->getArgOperand(2)};
4220 Ops[2] = Builder.CreateFNeg(Ops[2]);
4221 Rep = Builder.CreateIntrinsic(IID, Ops);
4222 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4223 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4224 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4225 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4226 bool IsMask3 = Name[11] == '3';
4227 bool IsMaskZ = Name[11] == 'z';
4228 // Drop the "avx512.mask." to make it easier.
4229 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4230 bool IsSubAdd = Name[3] == 's';
4231 if (CI->arg_size() == 5) {
4232 Intrinsic::ID IID;
4233 // Check the character before ".512" in string.
4234 if (Name[Name.size() - 5] == 's')
4235 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4236 else
4237 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4238
4239 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4240 CI->getArgOperand(2), CI->getArgOperand(4)};
4241 if (IsSubAdd)
4242 Ops[2] = Builder.CreateFNeg(Ops[2]);
4243
4244 Rep = Builder.CreateIntrinsic(IID, Ops);
4245 } else {
4246 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4247
4248 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4249 CI->getArgOperand(2)};
4250
4252 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4253 Value *Odd = Builder.CreateCall(FMA, Ops);
4254 Ops[2] = Builder.CreateFNeg(Ops[2]);
4255 Value *Even = Builder.CreateCall(FMA, Ops);
4256
4257 if (IsSubAdd)
4258 std::swap(Even, Odd);
4259
4260 SmallVector<int, 32> Idxs(NumElts);
4261 for (int i = 0; i != NumElts; ++i)
4262 Idxs[i] = i + (i % 2) * NumElts;
4263
4264 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4265 }
4266
4267 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4268 : IsMask3 ? CI->getArgOperand(2)
4269 : CI->getArgOperand(0);
4270
4271 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4272 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4273 Name.starts_with("avx512.maskz.pternlog.")) {
4274 bool ZeroMask = Name[11] == 'z';
4275 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4276 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4277 Intrinsic::ID IID;
4278 if (VecWidth == 128 && EltWidth == 32)
4279 IID = Intrinsic::x86_avx512_pternlog_d_128;
4280 else if (VecWidth == 256 && EltWidth == 32)
4281 IID = Intrinsic::x86_avx512_pternlog_d_256;
4282 else if (VecWidth == 512 && EltWidth == 32)
4283 IID = Intrinsic::x86_avx512_pternlog_d_512;
4284 else if (VecWidth == 128 && EltWidth == 64)
4285 IID = Intrinsic::x86_avx512_pternlog_q_128;
4286 else if (VecWidth == 256 && EltWidth == 64)
4287 IID = Intrinsic::x86_avx512_pternlog_q_256;
4288 else if (VecWidth == 512 && EltWidth == 64)
4289 IID = Intrinsic::x86_avx512_pternlog_q_512;
4290 else
4291 llvm_unreachable("Unexpected intrinsic");
4292
4293 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4294 CI->getArgOperand(2), CI->getArgOperand(3)};
4295 Rep = Builder.CreateIntrinsic(IID, Args);
4296 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4297 : CI->getArgOperand(0);
4298 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4299 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4300 Name.starts_with("avx512.maskz.vpmadd52")) {
4301 bool ZeroMask = Name[11] == 'z';
4302 bool High = Name[20] == 'h' || Name[21] == 'h';
4303 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4304 Intrinsic::ID IID;
4305 if (VecWidth == 128 && !High)
4306 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4307 else if (VecWidth == 256 && !High)
4308 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4309 else if (VecWidth == 512 && !High)
4310 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4311 else if (VecWidth == 128 && High)
4312 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4313 else if (VecWidth == 256 && High)
4314 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4315 else if (VecWidth == 512 && High)
4316 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4317 else
4318 llvm_unreachable("Unexpected intrinsic");
4319
4320 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4321 CI->getArgOperand(2)};
4322 Rep = Builder.CreateIntrinsic(IID, Args);
4323 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4324 : CI->getArgOperand(0);
4325 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4326 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4327 Name.starts_with("avx512.mask.vpermt2var.") ||
4328 Name.starts_with("avx512.maskz.vpermt2var.")) {
4329 bool ZeroMask = Name[11] == 'z';
4330 bool IndexForm = Name[17] == 'i';
4331 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4332 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4333 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4334 Name.starts_with("avx512.mask.vpdpbusds.") ||
4335 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4336 bool ZeroMask = Name[11] == 'z';
4337 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4338 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4339 Intrinsic::ID IID;
4340 if (VecWidth == 128 && !IsSaturating)
4341 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4342 else if (VecWidth == 256 && !IsSaturating)
4343 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4344 else if (VecWidth == 512 && !IsSaturating)
4345 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4346 else if (VecWidth == 128 && IsSaturating)
4347 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4348 else if (VecWidth == 256 && IsSaturating)
4349 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4350 else if (VecWidth == 512 && IsSaturating)
4351 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4352 else
4353 llvm_unreachable("Unexpected intrinsic");
4354
4355 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4356 CI->getArgOperand(2)};
4357
4358 // Input arguments types were incorrectly set to vectors of i32 before but
4359 // they should be vectors of i8. Insert bit cast when encountering the old
4360 // types
4361 if (Args[1]->getType()->isVectorTy() &&
4362 cast<VectorType>(Args[1]->getType())
4363 ->getElementType()
4364 ->isIntegerTy(32) &&
4365 Args[2]->getType()->isVectorTy() &&
4366 cast<VectorType>(Args[2]->getType())
4367 ->getElementType()
4368 ->isIntegerTy(32)) {
4369 Type *NewArgType = nullptr;
4370 if (VecWidth == 128)
4371 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4372 else if (VecWidth == 256)
4373 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4374 else if (VecWidth == 512)
4375 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4376 else
4377 llvm_unreachable("Unexpected vector bit width");
4378
4379 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4380 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4381 }
4382
4383 Rep = Builder.CreateIntrinsic(IID, Args);
4384 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4385 : CI->getArgOperand(0);
4386 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4387 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4388 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4389 Name.starts_with("avx512.mask.vpdpwssds.") ||
4390 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4391 bool ZeroMask = Name[11] == 'z';
4392 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4393 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4394 Intrinsic::ID IID;
4395 if (VecWidth == 128 && !IsSaturating)
4396 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4397 else if (VecWidth == 256 && !IsSaturating)
4398 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4399 else if (VecWidth == 512 && !IsSaturating)
4400 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4401 else if (VecWidth == 128 && IsSaturating)
4402 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4403 else if (VecWidth == 256 && IsSaturating)
4404 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4405 else if (VecWidth == 512 && IsSaturating)
4406 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4407 else
4408 llvm_unreachable("Unexpected intrinsic");
4409
4410 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4411 CI->getArgOperand(2)};
4412
4413 // Input arguments types were incorrectly set to vectors of i32 before but
4414 // they should be vectors of i16. Insert bit cast when encountering the old
4415 // types
4416 if (Args[1]->getType()->isVectorTy() &&
4417 cast<VectorType>(Args[1]->getType())
4418 ->getElementType()
4419 ->isIntegerTy(32) &&
4420 Args[2]->getType()->isVectorTy() &&
4421 cast<VectorType>(Args[2]->getType())
4422 ->getElementType()
4423 ->isIntegerTy(32)) {
4424 Type *NewArgType = nullptr;
4425 if (VecWidth == 128)
4426 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4427 else if (VecWidth == 256)
4428 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4429 else if (VecWidth == 512)
4430 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4431 else
4432 llvm_unreachable("Unexpected vector bit width");
4433
4434 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4435 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4436 }
4437
4438 Rep = Builder.CreateIntrinsic(IID, Args);
4439 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4440 : CI->getArgOperand(0);
4441 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4442 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4443 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4444 Name == "subborrow.u32" || Name == "subborrow.u64") {
4445 Intrinsic::ID IID;
4446 if (Name[0] == 'a' && Name.back() == '2')
4447 IID = Intrinsic::x86_addcarry_32;
4448 else if (Name[0] == 'a' && Name.back() == '4')
4449 IID = Intrinsic::x86_addcarry_64;
4450 else if (Name[0] == 's' && Name.back() == '2')
4451 IID = Intrinsic::x86_subborrow_32;
4452 else if (Name[0] == 's' && Name.back() == '4')
4453 IID = Intrinsic::x86_subborrow_64;
4454 else
4455 llvm_unreachable("Unexpected intrinsic");
4456
4457 // Make a call with 3 operands.
4458 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4459 CI->getArgOperand(2)};
4460 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4461
4462 // Extract the second result and store it.
4463 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4464 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4465 // Replace the original call result with the first result of the new call.
4466 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4467
4468 CI->replaceAllUsesWith(CF);
4469 Rep = nullptr;
4470 } else if (Name.starts_with("avx512.mask.") &&
4471 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4472 // Rep will be updated by the call in the condition.
4473 }
4474
4475 return Rep;
4476}
4477
4479 Function *F, IRBuilder<> &Builder) {
4480 if (Name.starts_with("neon.bfcvt")) {
4481 if (Name.starts_with("neon.bfcvtn2")) {
4482 SmallVector<int, 32> LoMask(4);
4483 std::iota(LoMask.begin(), LoMask.end(), 0);
4484 SmallVector<int, 32> ConcatMask(8);
4485 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4486 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4487 Value *Trunc =
4488 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4489 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4490 } else if (Name.starts_with("neon.bfcvtn")) {
4491 SmallVector<int, 32> ConcatMask(8);
4492 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4493 Type *V4BF16 =
4494 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4495 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4496 dbgs() << "Trunc: " << *Trunc << "\n";
4497 return Builder.CreateShuffleVector(
4498 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4499 } else {
4500 return Builder.CreateFPTrunc(CI->getOperand(0),
4501 Type::getBFloatTy(F->getContext()));
4502 }
4503 } else if (Name.starts_with("sve.fcvt")) {
4504 Intrinsic::ID NewID =
4506 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4507 .Case("sve.fcvtnt.bf16f32",
4508 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4510 if (NewID == Intrinsic::not_intrinsic)
4511 llvm_unreachable("Unhandled Intrinsic!");
4512
4513 SmallVector<Value *, 3> Args(CI->args());
4514
4515 // The original intrinsics incorrectly used a predicate based on the
4516 // smallest element type rather than the largest.
4517 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4518 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4519
4520 if (Args[1]->getType() != BadPredTy)
4521 llvm_unreachable("Unexpected predicate type!");
4522
4523 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4524 BadPredTy, Args[1]);
4525 Args[1] = Builder.CreateIntrinsic(
4526 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4527
4528 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4529 CI->getName());
4530 }
4531
4532 llvm_unreachable("Unhandled Intrinsic!");
4533}
4534
4536 IRBuilder<> &Builder) {
4537 if (Name == "mve.vctp64.old") {
4538 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4539 // correct type.
4540 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4541 CI->getArgOperand(0),
4542 /*FMFSource=*/nullptr, CI->getName());
4543 Value *C1 = Builder.CreateIntrinsic(
4544 Intrinsic::arm_mve_pred_v2i,
4545 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4546 return Builder.CreateIntrinsic(
4547 Intrinsic::arm_mve_pred_i2v,
4548 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4549 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4550 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4551 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4552 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4553 Name ==
4554 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4555 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4556 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4557 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4558 Name ==
4559 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4560 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4561 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4562 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4563 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4564 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4565 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4566 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4567 std::vector<Type *> Tys;
4568 unsigned ID = CI->getIntrinsicID();
4569 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4570 switch (ID) {
4571 case Intrinsic::arm_mve_mull_int_predicated:
4572 case Intrinsic::arm_mve_vqdmull_predicated:
4573 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4574 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4575 break;
4576 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4577 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4578 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4579 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4580 V2I1Ty};
4581 break;
4582 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4583 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4584 CI->getOperand(1)->getType(), V2I1Ty};
4585 break;
4586 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4587 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4588 CI->getOperand(2)->getType(), V2I1Ty};
4589 break;
4590 case Intrinsic::arm_cde_vcx1q_predicated:
4591 case Intrinsic::arm_cde_vcx1qa_predicated:
4592 case Intrinsic::arm_cde_vcx2q_predicated:
4593 case Intrinsic::arm_cde_vcx2qa_predicated:
4594 case Intrinsic::arm_cde_vcx3q_predicated:
4595 case Intrinsic::arm_cde_vcx3qa_predicated:
4596 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4597 break;
4598 default:
4599 llvm_unreachable("Unhandled Intrinsic!");
4600 }
4601
4602 std::vector<Value *> Ops;
4603 for (Value *Op : CI->args()) {
4604 Type *Ty = Op->getType();
4605 if (Ty->getScalarSizeInBits() == 1) {
4606 Value *C1 = Builder.CreateIntrinsic(
4607 Intrinsic::arm_mve_pred_v2i,
4608 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4609 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4610 }
4611 Ops.push_back(Op);
4612 }
4613
4614 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4615 CI->getName());
4616 }
4617 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4618}
4619
4620// These are expected to have the arguments:
4621// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4622//
4623// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4624//
4626 Function *F, IRBuilder<> &Builder) {
4627 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4628 // for compatibility.
4629 auto UpgradeLegacyWMMAIUIntrinsicCall =
4630 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4631 ArrayRef<Type *> OverloadTys) -> Value * {
4632 // Prepare arguments, append clamp=0 for compatibility
4633 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4634 Args.push_back(Builder.getFalse());
4635
4636 // Insert the declaration for the right overload types
4638 F->getParent(), F->getIntrinsicID(), OverloadTys);
4639
4640 // Copy operand bundles if any
4642 CI->getOperandBundlesAsDefs(Bundles);
4643
4644 // Create the new call and copy calling properties
4645 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4646 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4647 NewCall->setCallingConv(CI->getCallingConv());
4648 NewCall->setAttributes(CI->getAttributes());
4649 NewCall->setDebugLoc(CI->getDebugLoc());
4650 NewCall->copyMetadata(*CI);
4651 return NewCall;
4652 };
4653
4654 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4655 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4656 "intrinsic should have 7 arguments");
4657 Type *T1 = CI->getArgOperand(4)->getType();
4658 Type *T2 = CI->getArgOperand(1)->getType();
4659 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4660 }
4661 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4662 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4663 "intrinsic should have 8 arguments");
4664 Type *T1 = CI->getArgOperand(4)->getType();
4665 Type *T2 = CI->getArgOperand(1)->getType();
4666 Type *T3 = CI->getArgOperand(3)->getType();
4667 Type *T4 = CI->getArgOperand(5)->getType();
4668 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4669 }
4670
4671 AtomicRMWInst::BinOp RMWOp =
4673 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4674 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4675 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4676 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4677 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4678 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4679 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4680 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4681 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4682 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4683 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4684 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4685 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4686
4687 unsigned NumOperands = CI->getNumOperands();
4688 if (NumOperands < 3) // Malformed bitcode.
4689 return nullptr;
4690
4691 Value *Ptr = CI->getArgOperand(0);
4692 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4693 if (!PtrTy) // Malformed.
4694 return nullptr;
4695
4696 Value *Val = CI->getArgOperand(1);
4697 if (Val->getType() != CI->getType()) // Malformed.
4698 return nullptr;
4699
4700 ConstantInt *OrderArg = nullptr;
4701 bool IsVolatile = false;
4702
4703 // These should have 5 arguments (plus the callee). A separate version of the
4704 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4705 if (NumOperands > 3)
4706 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4707
4708 // Ignore scope argument at 3
4709
4710 if (NumOperands > 5) {
4711 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4712 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4713 }
4714
4716 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4717 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4720
4721 LLVMContext &Ctx = F->getContext();
4722
4723 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4724 Type *RetTy = CI->getType();
4725 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4726 if (VT->getElementType()->isIntegerTy(16)) {
4727 VectorType *AsBF16 =
4728 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4729 Val = Builder.CreateBitCast(Val, AsBF16);
4730 }
4731 }
4732
4733 // The scope argument never really worked correctly. Use agent as the most
4734 // conservative option which should still always produce the instruction.
4735 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4736 AtomicRMWInst *RMW =
4737 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4738
4739 unsigned AddrSpace = PtrTy->getAddressSpace();
4740 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4741 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4742 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4743 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4744 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4745 }
4746
4747 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4748 MDBuilder MDB(F->getContext());
4749 MDNode *RangeNotPrivate =
4752 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4753 }
4754
4755 if (IsVolatile)
4756 RMW->setVolatile(true);
4757
4758 return Builder.CreateBitCast(RMW, RetTy);
4759}
4760
4761/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4762/// plain MDNode, as it's the verifier's job to check these are the correct
4763/// types later.
4764static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4765 if (Op < CI->arg_size()) {
4766 if (MetadataAsValue *MAV =
4768 Metadata *MD = MAV->getMetadata();
4769 return dyn_cast_if_present<MDNode>(MD);
4770 }
4771 }
4772 return nullptr;
4773}
4774
4775/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4776static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4777 if (Op < CI->arg_size())
4779 return MAV->getMetadata();
4780 return nullptr;
4781}
4782
4784 // The MDNode attached to this instruction might not be the correct type,
4785 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4786 return I->getDebugLoc().getAsMDNode();
4787}
4788
4789/// Convert debug intrinsic calls to non-instruction debug records.
4790/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4791/// \p CI - The debug intrinsic call.
4793 DbgRecord *DR = nullptr;
4794 if (Name == "label") {
4796 CI->getDebugLoc());
4797 } else if (Name == "assign") {
4800 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4801 unwrapMAVMetadataOp(CI, 4),
4802 /*The address is a Value ref, it will be stored as a Metadata */
4803 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4804 } else if (Name == "declare") {
4807 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4808 getDebugLocSafe(CI));
4809 } else if (Name == "addr") {
4810 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4811 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4812 // Don't try to add something to the expression if it's not an expression.
4813 // Instead, allow the verifier to fail later.
4814 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4815 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4816 }
4819 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4820 getDebugLocSafe(CI));
4821 } else if (Name == "value") {
4822 // An old version of dbg.value had an extra offset argument.
4823 unsigned VarOp = 1;
4824 unsigned ExprOp = 2;
4825 if (CI->arg_size() == 4) {
4827 // Nonzero offset dbg.values get dropped without a replacement.
4828 if (!Offset || !Offset->isZeroValue())
4829 return;
4830 VarOp = 2;
4831 ExprOp = 3;
4832 }
4835 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4836 nullptr, getDebugLocSafe(CI));
4837 }
4838 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4839 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4840}
4841
4844 if (!Offset)
4845 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4846 int64_t OffsetVal = Offset->getSExtValue();
4847 return Builder.CreateIntrinsic(OffsetVal >= 0
4848 ? Intrinsic::vector_splice_left
4849 : Intrinsic::vector_splice_right,
4850 CI->getType(),
4851 {CI->getArgOperand(0), CI->getArgOperand(1),
4852 Builder.getInt32(std::abs(OffsetVal))});
4853}
4854
4855/// Upgrade a call to an old intrinsic. All argument and return casting must be
4856/// provided to seamlessly integrate with existing context.
4858 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4859 // checks the callee's function type matches. It's likely we need to handle
4860 // type changes here.
4862 if (!F)
4863 return;
4864
4865 LLVMContext &C = CI->getContext();
4866 IRBuilder<> Builder(C);
4867 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4868
4869 if (!NewFn) {
4870 // Get the Function's name.
4871 StringRef Name = F->getName();
4872
4873 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4874 Name = Name.substr(5);
4875
4876 bool IsX86 = Name.consume_front("x86.");
4877 bool IsNVVM = Name.consume_front("nvvm.");
4878 bool IsAArch64 = Name.consume_front("aarch64.");
4879 bool IsARM = Name.consume_front("arm.");
4880 bool IsAMDGCN = Name.consume_front("amdgcn.");
4881 bool IsDbg = Name.consume_front("dbg.");
4882 bool IsOldSplice =
4883 (Name.consume_front("experimental.vector.splice") ||
4884 Name.consume_front("vector.splice")) &&
4885 !(Name.starts_with(".left") || Name.starts_with(".right"));
4886 Value *Rep = nullptr;
4887
4888 if (!IsX86 && Name == "stackprotectorcheck") {
4889 Rep = nullptr;
4890 } else if (IsNVVM) {
4891 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4892 } else if (IsX86) {
4893 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4894 } else if (IsAArch64) {
4895 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4896 } else if (IsARM) {
4897 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4898 } else if (IsAMDGCN) {
4899 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4900 } else if (IsDbg) {
4902 } else if (IsOldSplice) {
4903 Rep = upgradeVectorSplice(CI, Builder);
4904 } else {
4905 llvm_unreachable("Unknown function for CallBase upgrade.");
4906 }
4907
4908 if (Rep)
4909 CI->replaceAllUsesWith(Rep);
4910 CI->eraseFromParent();
4911 return;
4912 }
4913
4914 const auto &DefaultCase = [&]() -> void {
4915 if (F == NewFn)
4916 return;
4917
4918 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4919 // Handle generic mangling change.
4920 assert(
4921 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4922 "Unknown function for CallBase upgrade and isn't just a name change");
4923 CI->setCalledFunction(NewFn);
4924 return;
4925 }
4926
4927 // This must be an upgrade from a named to a literal struct.
4928 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4929 assert(OldST != NewFn->getReturnType() &&
4930 "Return type must have changed");
4931 assert(OldST->getNumElements() ==
4932 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4933 "Must have same number of elements");
4934
4935 SmallVector<Value *> Args(CI->args());
4936 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4937 NewCI->setAttributes(CI->getAttributes());
4938 Value *Res = PoisonValue::get(OldST);
4939 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4940 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4941 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4942 }
4943 CI->replaceAllUsesWith(Res);
4944 CI->eraseFromParent();
4945 return;
4946 }
4947
4948 // We're probably about to produce something invalid. Let the verifier catch
4949 // it instead of dying here.
4950 CI->setCalledOperand(
4952 return;
4953 };
4954 CallInst *NewCall = nullptr;
4955 switch (NewFn->getIntrinsicID()) {
4956 default: {
4957 DefaultCase();
4958 return;
4959 }
4960 case Intrinsic::arm_neon_vst1:
4961 case Intrinsic::arm_neon_vst2:
4962 case Intrinsic::arm_neon_vst3:
4963 case Intrinsic::arm_neon_vst4:
4964 case Intrinsic::arm_neon_vst2lane:
4965 case Intrinsic::arm_neon_vst3lane:
4966 case Intrinsic::arm_neon_vst4lane: {
4967 SmallVector<Value *, 4> Args(CI->args());
4968 NewCall = Builder.CreateCall(NewFn, Args);
4969 break;
4970 }
4971 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4972 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4973 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4974 LLVMContext &Ctx = F->getParent()->getContext();
4975 SmallVector<Value *, 4> Args(CI->args());
4976 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4977 cast<ConstantInt>(Args[3])->getZExtValue());
4978 NewCall = Builder.CreateCall(NewFn, Args);
4979 break;
4980 }
4981 case Intrinsic::aarch64_sve_ld3_sret:
4982 case Intrinsic::aarch64_sve_ld4_sret:
4983 case Intrinsic::aarch64_sve_ld2_sret: {
4984 StringRef Name = F->getName();
4985 Name = Name.substr(5);
4986 unsigned N = StringSwitch<unsigned>(Name)
4987 .StartsWith("aarch64.sve.ld2", 2)
4988 .StartsWith("aarch64.sve.ld3", 3)
4989 .StartsWith("aarch64.sve.ld4", 4)
4990 .Default(0);
4991 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4992 unsigned MinElts = RetTy->getMinNumElements() / N;
4993 SmallVector<Value *, 2> Args(CI->args());
4994 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4995 Value *Ret = llvm::PoisonValue::get(RetTy);
4996 for (unsigned I = 0; I < N; I++) {
4997 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4998 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4999 }
5000 NewCall = dyn_cast<CallInst>(Ret);
5001 break;
5002 }
5003
5004 case Intrinsic::coro_end: {
5005 SmallVector<Value *, 3> Args(CI->args());
5006 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5007 NewCall = Builder.CreateCall(NewFn, Args);
5008 break;
5009 }
5010
5011 case Intrinsic::vector_extract: {
5012 StringRef Name = F->getName();
5013 Name = Name.substr(5); // Strip llvm
5014 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5015 DefaultCase();
5016 return;
5017 }
5018 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5019 unsigned MinElts = RetTy->getMinNumElements();
5020 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5021 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5022 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5023 break;
5024 }
5025
5026 case Intrinsic::vector_insert: {
5027 StringRef Name = F->getName();
5028 Name = Name.substr(5);
5029 if (!Name.starts_with("aarch64.sve.tuple")) {
5030 DefaultCase();
5031 return;
5032 }
5033 if (Name.starts_with("aarch64.sve.tuple.set")) {
5034 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5035 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5036 Value *NewIdx =
5037 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5038 NewCall = Builder.CreateCall(
5039 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5040 break;
5041 }
5042 if (Name.starts_with("aarch64.sve.tuple.create")) {
5043 unsigned N = StringSwitch<unsigned>(Name)
5044 .StartsWith("aarch64.sve.tuple.create2", 2)
5045 .StartsWith("aarch64.sve.tuple.create3", 3)
5046 .StartsWith("aarch64.sve.tuple.create4", 4)
5047 .Default(0);
5048 assert(N > 1 && "Create is expected to be between 2-4");
5049 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5050 Value *Ret = llvm::PoisonValue::get(RetTy);
5051 unsigned MinElts = RetTy->getMinNumElements() / N;
5052 for (unsigned I = 0; I < N; I++) {
5053 Value *V = CI->getArgOperand(I);
5054 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5055 }
5056 NewCall = dyn_cast<CallInst>(Ret);
5057 }
5058 break;
5059 }
5060
5061 case Intrinsic::arm_neon_bfdot:
5062 case Intrinsic::arm_neon_bfmmla:
5063 case Intrinsic::arm_neon_bfmlalb:
5064 case Intrinsic::arm_neon_bfmlalt:
5065 case Intrinsic::aarch64_neon_bfdot:
5066 case Intrinsic::aarch64_neon_bfmmla:
5067 case Intrinsic::aarch64_neon_bfmlalb:
5068 case Intrinsic::aarch64_neon_bfmlalt: {
5070 assert(CI->arg_size() == 3 &&
5071 "Mismatch between function args and call args");
5072 size_t OperandWidth =
5074 assert((OperandWidth == 64 || OperandWidth == 128) &&
5075 "Unexpected operand width");
5076 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5077 auto Iter = CI->args().begin();
5078 Args.push_back(*Iter++);
5079 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5080 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5081 NewCall = Builder.CreateCall(NewFn, Args);
5082 break;
5083 }
5084
5085 case Intrinsic::bitreverse:
5086 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5087 break;
5088
5089 case Intrinsic::ctlz:
5090 case Intrinsic::cttz: {
5091 if (CI->arg_size() != 1) {
5092 DefaultCase();
5093 return;
5094 }
5095
5096 NewCall =
5097 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5098 break;
5099 }
5100
5101 case Intrinsic::objectsize: {
5102 Value *NullIsUnknownSize =
5103 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5104 Value *Dynamic =
5105 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5106 NewCall = Builder.CreateCall(
5107 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5108 break;
5109 }
5110
5111 case Intrinsic::ctpop:
5112 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5113 break;
5114
5115 case Intrinsic::convert_from_fp16:
5116 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5117 break;
5118
5119 case Intrinsic::dbg_value: {
5120 StringRef Name = F->getName();
5121 Name = Name.substr(5); // Strip llvm.
5122 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5123 if (Name.starts_with("dbg.addr")) {
5125 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5126 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5127 NewCall =
5128 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5129 MetadataAsValue::get(C, Expr)});
5130 break;
5131 }
5132
5133 // Upgrade from the old version that had an extra offset argument.
5134 assert(CI->arg_size() == 4);
5135 // Drop nonzero offsets instead of attempting to upgrade them.
5137 if (Offset->isZeroValue()) {
5138 NewCall = Builder.CreateCall(
5139 NewFn,
5140 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5141 break;
5142 }
5143 CI->eraseFromParent();
5144 return;
5145 }
5146
5147 case Intrinsic::ptr_annotation:
5148 // Upgrade from versions that lacked the annotation attribute argument.
5149 if (CI->arg_size() != 4) {
5150 DefaultCase();
5151 return;
5152 }
5153
5154 // Create a new call with an added null annotation attribute argument.
5155 NewCall = Builder.CreateCall(
5156 NewFn,
5157 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5158 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5159 NewCall->takeName(CI);
5160 CI->replaceAllUsesWith(NewCall);
5161 CI->eraseFromParent();
5162 return;
5163
5164 case Intrinsic::var_annotation:
5165 // Upgrade from versions that lacked the annotation attribute argument.
5166 if (CI->arg_size() != 4) {
5167 DefaultCase();
5168 return;
5169 }
5170 // Create a new call with an added null annotation attribute argument.
5171 NewCall = Builder.CreateCall(
5172 NewFn,
5173 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5174 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5175 NewCall->takeName(CI);
5176 CI->replaceAllUsesWith(NewCall);
5177 CI->eraseFromParent();
5178 return;
5179
5180 case Intrinsic::riscv_aes32dsi:
5181 case Intrinsic::riscv_aes32dsmi:
5182 case Intrinsic::riscv_aes32esi:
5183 case Intrinsic::riscv_aes32esmi:
5184 case Intrinsic::riscv_sm4ks:
5185 case Intrinsic::riscv_sm4ed: {
5186 // The last argument to these intrinsics used to be i8 and changed to i32.
5187 // The type overload for sm4ks and sm4ed was removed.
5188 Value *Arg2 = CI->getArgOperand(2);
5189 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5190 return;
5191
5192 Value *Arg0 = CI->getArgOperand(0);
5193 Value *Arg1 = CI->getArgOperand(1);
5194 if (CI->getType()->isIntegerTy(64)) {
5195 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5196 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5197 }
5198
5199 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5200 cast<ConstantInt>(Arg2)->getZExtValue());
5201
5202 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5203 Value *Res = NewCall;
5204 if (Res->getType() != CI->getType())
5205 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5206 NewCall->takeName(CI);
5207 CI->replaceAllUsesWith(Res);
5208 CI->eraseFromParent();
5209 return;
5210 }
5211 case Intrinsic::nvvm_mapa_shared_cluster: {
5212 // Create a new call with the correct address space.
5213 NewCall =
5214 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5215 Value *Res = NewCall;
5216 Res = Builder.CreateAddrSpaceCast(
5217 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5218 NewCall->takeName(CI);
5219 CI->replaceAllUsesWith(Res);
5220 CI->eraseFromParent();
5221 return;
5222 }
5223 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5224 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5225 // Create a new call with the correct address space.
5226 SmallVector<Value *, 4> Args(CI->args());
5227 Args[0] = Builder.CreateAddrSpaceCast(
5228 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5229
5230 NewCall = Builder.CreateCall(NewFn, Args);
5231 NewCall->takeName(CI);
5232 CI->replaceAllUsesWith(NewCall);
5233 CI->eraseFromParent();
5234 return;
5235 }
5236 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5237 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5238 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5239 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5240 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5241 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5242 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5243 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5244 SmallVector<Value *, 16> Args(CI->args());
5245
5246 // Create AddrSpaceCast to shared_cluster if needed.
5247 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5248 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5250 Args[0] = Builder.CreateAddrSpaceCast(
5251 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5252
5253 // Attach the flag argument for cta_group, with a
5254 // default value of 0. This handles case (2) in
5255 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5256 size_t NumArgs = CI->arg_size();
5257 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5258 if (!FlagArg->getType()->isIntegerTy(1))
5259 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5260
5261 NewCall = Builder.CreateCall(NewFn, Args);
5262 NewCall->takeName(CI);
5263 CI->replaceAllUsesWith(NewCall);
5264 CI->eraseFromParent();
5265 return;
5266 }
5267 case Intrinsic::riscv_sha256sig0:
5268 case Intrinsic::riscv_sha256sig1:
5269 case Intrinsic::riscv_sha256sum0:
5270 case Intrinsic::riscv_sha256sum1:
5271 case Intrinsic::riscv_sm3p0:
5272 case Intrinsic::riscv_sm3p1: {
5273 // The last argument to these intrinsics used to be i8 and changed to i32.
5274 // The type overload for sm4ks and sm4ed was removed.
5275 if (!CI->getType()->isIntegerTy(64))
5276 return;
5277
5278 Value *Arg =
5279 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5280
5281 NewCall = Builder.CreateCall(NewFn, Arg);
5282 Value *Res =
5283 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5284 NewCall->takeName(CI);
5285 CI->replaceAllUsesWith(Res);
5286 CI->eraseFromParent();
5287 return;
5288 }
5289
5290 case Intrinsic::x86_xop_vfrcz_ss:
5291 case Intrinsic::x86_xop_vfrcz_sd:
5292 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5293 break;
5294
5295 case Intrinsic::x86_xop_vpermil2pd:
5296 case Intrinsic::x86_xop_vpermil2ps:
5297 case Intrinsic::x86_xop_vpermil2pd_256:
5298 case Intrinsic::x86_xop_vpermil2ps_256: {
5299 SmallVector<Value *, 4> Args(CI->args());
5300 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5301 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5302 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5303 NewCall = Builder.CreateCall(NewFn, Args);
5304 break;
5305 }
5306
5307 case Intrinsic::x86_sse41_ptestc:
5308 case Intrinsic::x86_sse41_ptestz:
5309 case Intrinsic::x86_sse41_ptestnzc: {
5310 // The arguments for these intrinsics used to be v4f32, and changed
5311 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5312 // So, the only thing required is a bitcast for both arguments.
5313 // First, check the arguments have the old type.
5314 Value *Arg0 = CI->getArgOperand(0);
5315 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5316 return;
5317
5318 // Old intrinsic, add bitcasts
5319 Value *Arg1 = CI->getArgOperand(1);
5320
5321 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5322
5323 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5324 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5325
5326 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5327 break;
5328 }
5329
5330 case Intrinsic::x86_rdtscp: {
5331 // This used to take 1 arguments. If we have no arguments, it is already
5332 // upgraded.
5333 if (CI->getNumOperands() == 0)
5334 return;
5335
5336 NewCall = Builder.CreateCall(NewFn);
5337 // Extract the second result and store it.
5338 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5339 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5340 // Replace the original call result with the first result of the new call.
5341 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5342
5343 NewCall->takeName(CI);
5344 CI->replaceAllUsesWith(TSC);
5345 CI->eraseFromParent();
5346 return;
5347 }
5348
5349 case Intrinsic::x86_sse41_insertps:
5350 case Intrinsic::x86_sse41_dppd:
5351 case Intrinsic::x86_sse41_dpps:
5352 case Intrinsic::x86_sse41_mpsadbw:
5353 case Intrinsic::x86_avx_dp_ps_256:
5354 case Intrinsic::x86_avx2_mpsadbw: {
5355 // Need to truncate the last argument from i32 to i8 -- this argument models
5356 // an inherently 8-bit immediate operand to these x86 instructions.
5357 SmallVector<Value *, 4> Args(CI->args());
5358
5359 // Replace the last argument with a trunc.
5360 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5361 NewCall = Builder.CreateCall(NewFn, Args);
5362 break;
5363 }
5364
5365 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5366 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5367 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5368 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5369 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5370 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5371 SmallVector<Value *, 4> Args(CI->args());
5372 unsigned NumElts =
5373 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5374 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5375
5376 NewCall = Builder.CreateCall(NewFn, Args);
5377 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5378
5379 NewCall->takeName(CI);
5380 CI->replaceAllUsesWith(Res);
5381 CI->eraseFromParent();
5382 return;
5383 }
5384
5385 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5386 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5387 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5388 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5389 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5390 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5391 SmallVector<Value *, 4> Args(CI->args());
5392 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5393 if (NewFn->getIntrinsicID() ==
5394 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5395 Args[1] = Builder.CreateBitCast(
5396 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5397
5398 NewCall = Builder.CreateCall(NewFn, Args);
5399 Value *Res = Builder.CreateBitCast(
5400 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5401
5402 NewCall->takeName(CI);
5403 CI->replaceAllUsesWith(Res);
5404 CI->eraseFromParent();
5405 return;
5406 }
5407 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5408 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5409 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5410 SmallVector<Value *, 4> Args(CI->args());
5411 unsigned NumElts =
5412 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5413 Args[1] = Builder.CreateBitCast(
5414 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5415 Args[2] = Builder.CreateBitCast(
5416 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5417
5418 NewCall = Builder.CreateCall(NewFn, Args);
5419 break;
5420 }
5421
5422 case Intrinsic::thread_pointer: {
5423 NewCall = Builder.CreateCall(NewFn, {});
5424 break;
5425 }
5426
5427 case Intrinsic::memcpy:
5428 case Intrinsic::memmove:
5429 case Intrinsic::memset: {
5430 // We have to make sure that the call signature is what we're expecting.
5431 // We only want to change the old signatures by removing the alignment arg:
5432 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5433 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5434 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5435 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5436 // Note: i8*'s in the above can be any pointer type
5437 if (CI->arg_size() != 5) {
5438 DefaultCase();
5439 return;
5440 }
5441 // Remove alignment argument (3), and add alignment attributes to the
5442 // dest/src pointers.
5443 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5444 CI->getArgOperand(2), CI->getArgOperand(4)};
5445 NewCall = Builder.CreateCall(NewFn, Args);
5446 AttributeList OldAttrs = CI->getAttributes();
5447 AttributeList NewAttrs = AttributeList::get(
5448 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5449 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5450 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5451 NewCall->setAttributes(NewAttrs);
5452 auto *MemCI = cast<MemIntrinsic>(NewCall);
5453 // All mem intrinsics support dest alignment.
5455 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5456 // Memcpy/Memmove also support source alignment.
5457 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5458 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5459 break;
5460 }
5461
5462 case Intrinsic::masked_load:
5463 case Intrinsic::masked_gather:
5464 case Intrinsic::masked_store:
5465 case Intrinsic::masked_scatter: {
5466 if (CI->arg_size() != 4) {
5467 DefaultCase();
5468 return;
5469 }
5470
5471 auto GetMaybeAlign = [](Value *Op) {
5472 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5473 uint64_t Val = CI->getZExtValue();
5474 if (Val == 0)
5475 return MaybeAlign();
5476 if (isPowerOf2_64(Val))
5477 return MaybeAlign(Val);
5478 }
5479 reportFatalUsageError("Invalid alignment argument");
5480 };
5481 auto GetAlign = [&](Value *Op) {
5482 MaybeAlign Align = GetMaybeAlign(Op);
5483 if (Align)
5484 return *Align;
5485 reportFatalUsageError("Invalid zero alignment argument");
5486 };
5487
5488 const DataLayout &DL = CI->getDataLayout();
5489 switch (NewFn->getIntrinsicID()) {
5490 case Intrinsic::masked_load:
5491 NewCall = Builder.CreateMaskedLoad(
5492 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5493 CI->getArgOperand(2), CI->getArgOperand(3));
5494 break;
5495 case Intrinsic::masked_gather:
5496 NewCall = Builder.CreateMaskedGather(
5497 CI->getType(), CI->getArgOperand(0),
5498 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5499 CI->getType()->getScalarType()),
5500 CI->getArgOperand(2), CI->getArgOperand(3));
5501 break;
5502 case Intrinsic::masked_store:
5503 NewCall = Builder.CreateMaskedStore(
5504 CI->getArgOperand(0), CI->getArgOperand(1),
5505 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5506 break;
5507 case Intrinsic::masked_scatter:
5508 NewCall = Builder.CreateMaskedScatter(
5509 CI->getArgOperand(0), CI->getArgOperand(1),
5510 DL.getValueOrABITypeAlignment(
5511 GetMaybeAlign(CI->getArgOperand(2)),
5512 CI->getArgOperand(0)->getType()->getScalarType()),
5513 CI->getArgOperand(3));
5514 break;
5515 default:
5516 llvm_unreachable("Unexpected intrinsic ID");
5517 }
5518 // Previous metadata is still valid.
5519 NewCall->copyMetadata(*CI);
5520 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5521 break;
5522 }
5523
5524 case Intrinsic::lifetime_start:
5525 case Intrinsic::lifetime_end: {
5526 if (CI->arg_size() != 2) {
5527 DefaultCase();
5528 return;
5529 }
5530
5531 Value *Ptr = CI->getArgOperand(1);
5532 // Try to strip pointer casts, such that the lifetime works on an alloca.
5533 Ptr = Ptr->stripPointerCasts();
5534 if (isa<AllocaInst>(Ptr)) {
5535 // Don't use NewFn, as we might have looked through an addrspacecast.
5536 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5537 NewCall = Builder.CreateLifetimeStart(Ptr);
5538 else
5539 NewCall = Builder.CreateLifetimeEnd(Ptr);
5540 break;
5541 }
5542
5543 // Otherwise remove the lifetime marker.
5544 CI->eraseFromParent();
5545 return;
5546 }
5547
5548 case Intrinsic::x86_avx512_vpdpbusd_128:
5549 case Intrinsic::x86_avx512_vpdpbusd_256:
5550 case Intrinsic::x86_avx512_vpdpbusd_512:
5551 case Intrinsic::x86_avx512_vpdpbusds_128:
5552 case Intrinsic::x86_avx512_vpdpbusds_256:
5553 case Intrinsic::x86_avx512_vpdpbusds_512:
5554 case Intrinsic::x86_avx2_vpdpbssd_128:
5555 case Intrinsic::x86_avx2_vpdpbssd_256:
5556 case Intrinsic::x86_avx10_vpdpbssd_512:
5557 case Intrinsic::x86_avx2_vpdpbssds_128:
5558 case Intrinsic::x86_avx2_vpdpbssds_256:
5559 case Intrinsic::x86_avx10_vpdpbssds_512:
5560 case Intrinsic::x86_avx2_vpdpbsud_128:
5561 case Intrinsic::x86_avx2_vpdpbsud_256:
5562 case Intrinsic::x86_avx10_vpdpbsud_512:
5563 case Intrinsic::x86_avx2_vpdpbsuds_128:
5564 case Intrinsic::x86_avx2_vpdpbsuds_256:
5565 case Intrinsic::x86_avx10_vpdpbsuds_512:
5566 case Intrinsic::x86_avx2_vpdpbuud_128:
5567 case Intrinsic::x86_avx2_vpdpbuud_256:
5568 case Intrinsic::x86_avx10_vpdpbuud_512:
5569 case Intrinsic::x86_avx2_vpdpbuuds_128:
5570 case Intrinsic::x86_avx2_vpdpbuuds_256:
5571 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5572 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5573 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5574 CI->getArgOperand(2)};
5575 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5576 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5577 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5578
5579 NewCall = Builder.CreateCall(NewFn, Args);
5580 break;
5581 }
5582 case Intrinsic::x86_avx512_vpdpwssd_128:
5583 case Intrinsic::x86_avx512_vpdpwssd_256:
5584 case Intrinsic::x86_avx512_vpdpwssd_512:
5585 case Intrinsic::x86_avx512_vpdpwssds_128:
5586 case Intrinsic::x86_avx512_vpdpwssds_256:
5587 case Intrinsic::x86_avx512_vpdpwssds_512:
5588 case Intrinsic::x86_avx2_vpdpwsud_128:
5589 case Intrinsic::x86_avx2_vpdpwsud_256:
5590 case Intrinsic::x86_avx10_vpdpwsud_512:
5591 case Intrinsic::x86_avx2_vpdpwsuds_128:
5592 case Intrinsic::x86_avx2_vpdpwsuds_256:
5593 case Intrinsic::x86_avx10_vpdpwsuds_512:
5594 case Intrinsic::x86_avx2_vpdpwusd_128:
5595 case Intrinsic::x86_avx2_vpdpwusd_256:
5596 case Intrinsic::x86_avx10_vpdpwusd_512:
5597 case Intrinsic::x86_avx2_vpdpwusds_128:
5598 case Intrinsic::x86_avx2_vpdpwusds_256:
5599 case Intrinsic::x86_avx10_vpdpwusds_512:
5600 case Intrinsic::x86_avx2_vpdpwuud_128:
5601 case Intrinsic::x86_avx2_vpdpwuud_256:
5602 case Intrinsic::x86_avx10_vpdpwuud_512:
5603 case Intrinsic::x86_avx2_vpdpwuuds_128:
5604 case Intrinsic::x86_avx2_vpdpwuuds_256:
5605 case Intrinsic::x86_avx10_vpdpwuuds_512:
5606 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5607 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5608 CI->getArgOperand(2)};
5609 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5610 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5611 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5612
5613 NewCall = Builder.CreateCall(NewFn, Args);
5614 break;
5615 }
5616 assert(NewCall && "Should have either set this variable or returned through "
5617 "the default case");
5618 NewCall->takeName(CI);
5619 CI->replaceAllUsesWith(NewCall);
5620 CI->eraseFromParent();
5621}
5622
5624 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5625
5626 // Check if this function should be upgraded and get the replacement function
5627 // if there is one.
5628 Function *NewFn;
5629 if (UpgradeIntrinsicFunction(F, NewFn)) {
5630 // Replace all users of the old function with the new function or new
5631 // instructions. This is not a range loop because the call is deleted.
5632 for (User *U : make_early_inc_range(F->users()))
5633 if (CallBase *CB = dyn_cast<CallBase>(U))
5634 UpgradeIntrinsicCall(CB, NewFn);
5635
5636 // Remove old function, no longer used, from the module.
5637 if (F != NewFn)
5638 F->eraseFromParent();
5639 }
5640}
5641
5643 const unsigned NumOperands = MD.getNumOperands();
5644 if (NumOperands == 0)
5645 return &MD; // Invalid, punt to a verifier error.
5646
5647 // Check if the tag uses struct-path aware TBAA format.
5648 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5649 return &MD;
5650
5651 auto &Context = MD.getContext();
5652 if (NumOperands == 3) {
5653 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5654 MDNode *ScalarType = MDNode::get(Context, Elts);
5655 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5656 Metadata *Elts2[] = {ScalarType, ScalarType,
5659 MD.getOperand(2)};
5660 return MDNode::get(Context, Elts2);
5661 }
5662 // Create a MDNode <MD, MD, offset 0>
5664 Type::getInt64Ty(Context)))};
5665 return MDNode::get(Context, Elts);
5666}
5667
5669 Instruction *&Temp) {
5670 if (Opc != Instruction::BitCast)
5671 return nullptr;
5672
5673 Temp = nullptr;
5674 Type *SrcTy = V->getType();
5675 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5676 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5677 LLVMContext &Context = V->getContext();
5678
5679 // We have no information about target data layout, so we assume that
5680 // the maximum pointer size is 64bit.
5681 Type *MidTy = Type::getInt64Ty(Context);
5682 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5683
5684 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5685 }
5686
5687 return nullptr;
5688}
5689
5691 if (Opc != Instruction::BitCast)
5692 return nullptr;
5693
5694 Type *SrcTy = C->getType();
5695 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5696 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5697 LLVMContext &Context = C->getContext();
5698
5699 // We have no information about target data layout, so we assume that
5700 // the maximum pointer size is 64bit.
5701 Type *MidTy = Type::getInt64Ty(Context);
5702
5704 DestTy);
5705 }
5706
5707 return nullptr;
5708}
5709
5710/// Check the debug info version number, if it is out-dated, drop the debug
5711/// info. Return true if module is modified.
5714 return false;
5715
5716 llvm::TimeTraceScope timeScope("Upgrade debug info");
5717 // We need to get metadata before the module is verified (i.e., getModuleFlag
5718 // makes assumptions that we haven't verified yet). Carefully extract the flag
5719 // from the metadata.
5720 unsigned Version = 0;
5721 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5722 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5723 if (Flag->getNumOperands() < 3)
5724 return false;
5725 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5726 return K->getString() == "Debug Info Version";
5727 return false;
5728 });
5729 if (OpIt != ModFlags->op_end()) {
5730 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5731 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5732 Version = CI->getZExtValue();
5733 }
5734 }
5735
5737 bool BrokenDebugInfo = false;
5738 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5739 report_fatal_error("Broken module found, compilation aborted!");
5740 if (!BrokenDebugInfo)
5741 // Everything is ok.
5742 return false;
5743 else {
5744 // Diagnose malformed debug info.
5746 M.getContext().diagnose(Diag);
5747 }
5748 }
5749 bool Modified = StripDebugInfo(M);
5751 // Diagnose a version mismatch.
5753 M.getContext().diagnose(DiagVersion);
5754 }
5755 return Modified;
5756}
5757
5758static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5759 GlobalValue *GV, const Metadata *V) {
5760 Function *F = cast<Function>(GV);
5761
5762 constexpr StringLiteral DefaultValue = "1";
5763 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5764 unsigned Length = 0;
5765
5766 if (F->hasFnAttribute(Attr)) {
5767 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5768 // parse these elements placing them into Vect3
5769 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5770 for (; Length < 3 && !S.empty(); Length++) {
5771 auto [Part, Rest] = S.split(',');
5772 Vect3[Length] = Part.trim();
5773 S = Rest;
5774 }
5775 }
5776
5777 const unsigned Dim = DimC - 'x';
5778 assert(Dim < 3 && "Unexpected dim char");
5779
5780 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5781
5782 // local variable required for StringRef in Vect3 to point to.
5783 const std::string VStr = llvm::utostr(VInt);
5784 Vect3[Dim] = VStr;
5785 Length = std::max(Length, Dim + 1);
5786
5787 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5788 F->addFnAttr(Attr, NewAttr);
5789}
5790
5791static inline bool isXYZ(StringRef S) {
5792 return S == "x" || S == "y" || S == "z";
5793}
5794
5796 const Metadata *V) {
5797 if (K == "kernel") {
5799 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5800 return true;
5801 }
5802 if (K == "align") {
5803 // V is a bitfeild specifying two 16-bit values. The alignment value is
5804 // specfied in low 16-bits, The index is specified in the high bits. For the
5805 // index, 0 indicates the return value while higher values correspond to
5806 // each parameter (idx = param + 1).
5807 const uint64_t AlignIdxValuePair =
5808 mdconst::extract<ConstantInt>(V)->getZExtValue();
5809 const unsigned Idx = (AlignIdxValuePair >> 16);
5810 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5811 cast<Function>(GV)->addAttributeAtIndex(
5812 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5813 return true;
5814 }
5815 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5816 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5817 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5818 return true;
5819 }
5820 if (K == "minctasm") {
5821 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5822 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5823 return true;
5824 }
5825 if (K == "maxnreg") {
5826 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5827 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5828 return true;
5829 }
5830 if (K.consume_front("maxntid") && isXYZ(K)) {
5831 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5832 return true;
5833 }
5834 if (K.consume_front("reqntid") && isXYZ(K)) {
5835 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5836 return true;
5837 }
5838 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5839 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5840 return true;
5841 }
5842 if (K == "grid_constant") {
5843 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5844 for (const auto &Op : cast<MDNode>(V)->operands()) {
5845 // For some reason, the index is 1-based in the metadata. Good thing we're
5846 // able to auto-upgrade it!
5847 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5848 cast<Function>(GV)->addParamAttr(Index, Attr);
5849 }
5850 return true;
5851 }
5852
5853 return false;
5854}
5855
5857 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5858 if (!NamedMD)
5859 return;
5860
5861 SmallVector<MDNode *, 8> NewNodes;
5863 for (MDNode *MD : NamedMD->operands()) {
5864 if (!SeenNodes.insert(MD).second)
5865 continue;
5866
5867 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5868 if (!GV)
5869 continue;
5870
5871 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5872
5873 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5874 // Each nvvm.annotations metadata entry will be of the following form:
5875 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5876 // start index = 1, to skip the global variable key
5877 // increment = 2, to skip the value for each property-value pairs
5878 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5879 MDString *K = cast<MDString>(MD->getOperand(j));
5880 const MDOperand &V = MD->getOperand(j + 1);
5881 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5882 if (!Upgraded)
5883 NewOperands.append({K, V});
5884 }
5885
5886 if (NewOperands.size() > 1)
5887 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5888 }
5889
5890 NamedMD->clearOperands();
5891 for (MDNode *N : NewNodes)
5892 NamedMD->addOperand(N);
5893}
5894
5895/// This checks for objc retain release marker which should be upgraded. It
5896/// returns true if module is modified.
5898 bool Changed = false;
5899 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5900 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5901 if (ModRetainReleaseMarker) {
5902 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5903 if (Op) {
5904 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5905 if (ID) {
5906 SmallVector<StringRef, 4> ValueComp;
5907 ID->getString().split(ValueComp, "#");
5908 if (ValueComp.size() == 2) {
5909 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5910 ID = MDString::get(M.getContext(), NewValue);
5911 }
5912 M.addModuleFlag(Module::Error, MarkerKey, ID);
5913 M.eraseNamedMetadata(ModRetainReleaseMarker);
5914 Changed = true;
5915 }
5916 }
5917 }
5918 return Changed;
5919}
5920
5922 // This lambda converts normal function calls to ARC runtime functions to
5923 // intrinsic calls.
5924 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5925 llvm::Intrinsic::ID IntrinsicFunc) {
5926 Function *Fn = M.getFunction(OldFunc);
5927
5928 if (!Fn)
5929 return;
5930
5931 Function *NewFn =
5932 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5933
5934 for (User *U : make_early_inc_range(Fn->users())) {
5936 if (!CI || CI->getCalledFunction() != Fn)
5937 continue;
5938
5939 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5940 FunctionType *NewFuncTy = NewFn->getFunctionType();
5942
5943 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5944 // value to the return type of the old function.
5945 if (NewFuncTy->getReturnType() != CI->getType() &&
5946 !CastInst::castIsValid(Instruction::BitCast, CI,
5947 NewFuncTy->getReturnType()))
5948 continue;
5949
5950 bool InvalidCast = false;
5951
5952 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5953 Value *Arg = CI->getArgOperand(I);
5954
5955 // Bitcast argument to the parameter type of the new function if it's
5956 // not a variadic argument.
5957 if (I < NewFuncTy->getNumParams()) {
5958 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5959 // to the parameter type of the new function.
5960 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5961 NewFuncTy->getParamType(I))) {
5962 InvalidCast = true;
5963 break;
5964 }
5965 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5966 }
5967 Args.push_back(Arg);
5968 }
5969
5970 if (InvalidCast)
5971 continue;
5972
5973 // Create a call instruction that calls the new function.
5974 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5975 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5976 NewCall->takeName(CI);
5977
5978 // Bitcast the return value back to the type of the old call.
5979 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5980
5981 if (!CI->use_empty())
5982 CI->replaceAllUsesWith(NewRetVal);
5983 CI->eraseFromParent();
5984 }
5985
5986 if (Fn->use_empty())
5987 Fn->eraseFromParent();
5988 };
5989
5990 // Unconditionally convert a call to "clang.arc.use" to a call to
5991 // "llvm.objc.clang.arc.use".
5992 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5993
5994 // Upgrade the retain release marker. If there is no need to upgrade
5995 // the marker, that means either the module is already new enough to contain
5996 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5998 return;
5999
6000 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6001 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6002 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6003 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6004 {"objc_autoreleaseReturnValue",
6005 llvm::Intrinsic::objc_autoreleaseReturnValue},
6006 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6007 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6008 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6009 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6010 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6011 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6012 {"objc_release", llvm::Intrinsic::objc_release},
6013 {"objc_retain", llvm::Intrinsic::objc_retain},
6014 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6015 {"objc_retainAutoreleaseReturnValue",
6016 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6017 {"objc_retainAutoreleasedReturnValue",
6018 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6019 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6020 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6021 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6022 {"objc_unsafeClaimAutoreleasedReturnValue",
6023 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6024 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6025 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6026 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6027 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6028 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6029 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6030 {"objc_arc_annotation_topdown_bbstart",
6031 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6032 {"objc_arc_annotation_topdown_bbend",
6033 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6034 {"objc_arc_annotation_bottomup_bbstart",
6035 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6036 {"objc_arc_annotation_bottomup_bbend",
6037 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6038
6039 for (auto &I : RuntimeFuncs)
6040 UpgradeToIntrinsic(I.first, I.second);
6041}
6042
6044 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6045 if (!ModFlags)
6046 return false;
6047
6048 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6049 bool HasSwiftVersionFlag = false;
6050 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6051 uint32_t SwiftABIVersion;
6052 auto Int8Ty = Type::getInt8Ty(M.getContext());
6053 auto Int32Ty = Type::getInt32Ty(M.getContext());
6054
6055 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6056 MDNode *Op = ModFlags->getOperand(I);
6057 if (Op->getNumOperands() != 3)
6058 continue;
6059 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6060 if (!ID)
6061 continue;
6062 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6063 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6064 Type::getInt32Ty(M.getContext()), B)),
6065 MDString::get(M.getContext(), ID->getString()),
6066 Op->getOperand(2)};
6067 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6068 Changed = true;
6069 };
6070
6071 if (ID->getString() == "Objective-C Image Info Version")
6072 HasObjCFlag = true;
6073 if (ID->getString() == "Objective-C Class Properties")
6074 HasClassProperties = true;
6075 // Upgrade PIC from Error/Max to Min.
6076 if (ID->getString() == "PIC Level") {
6077 if (auto *Behavior =
6079 uint64_t V = Behavior->getLimitedValue();
6080 if (V == Module::Error || V == Module::Max)
6081 SetBehavior(Module::Min);
6082 }
6083 }
6084 // Upgrade "PIE Level" from Error to Max.
6085 if (ID->getString() == "PIE Level")
6086 if (auto *Behavior =
6088 if (Behavior->getLimitedValue() == Module::Error)
6089 SetBehavior(Module::Max);
6090
6091 // Upgrade branch protection and return address signing module flags. The
6092 // module flag behavior for these fields were Error and now they are Min.
6093 if (ID->getString() == "branch-target-enforcement" ||
6094 ID->getString().starts_with("sign-return-address")) {
6095 if (auto *Behavior =
6097 if (Behavior->getLimitedValue() == Module::Error) {
6098 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6099 Metadata *Ops[3] = {
6100 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6101 Op->getOperand(1), Op->getOperand(2)};
6102 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6103 Changed = true;
6104 }
6105 }
6106 }
6107
6108 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6109 // section name so that llvm-lto will not complain about mismatching
6110 // module flags that is functionally the same.
6111 if (ID->getString() == "Objective-C Image Info Section") {
6112 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6113 SmallVector<StringRef, 4> ValueComp;
6114 Value->getString().split(ValueComp, " ");
6115 if (ValueComp.size() != 1) {
6116 std::string NewValue;
6117 for (auto &S : ValueComp)
6118 NewValue += S.str();
6119 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6120 MDString::get(M.getContext(), NewValue)};
6121 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6122 Changed = true;
6123 }
6124 }
6125 }
6126
6127 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6128 // If the higher bits are set, it adds new module flag for swift info.
6129 if (ID->getString() == "Objective-C Garbage Collection") {
6130 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6131 if (Md) {
6132 assert(Md->getValue() && "Expected non-empty metadata");
6133 auto Type = Md->getValue()->getType();
6134 if (Type == Int8Ty)
6135 continue;
6136 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6137 if ((Val & 0xff) != Val) {
6138 HasSwiftVersionFlag = true;
6139 SwiftABIVersion = (Val & 0xff00) >> 8;
6140 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6141 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6142 }
6143 Metadata *Ops[3] = {
6145 Op->getOperand(1),
6146 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6147 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6148 Changed = true;
6149 }
6150 }
6151
6152 if (ID->getString() == "amdgpu_code_object_version") {
6153 Metadata *Ops[3] = {
6154 Op->getOperand(0),
6155 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6156 Op->getOperand(2)};
6157 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6158 Changed = true;
6159 }
6160 }
6161
6162 // "Objective-C Class Properties" is recently added for Objective-C. We
6163 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6164 // flag of value 0, so we can correclty downgrade this flag when trying to
6165 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6166 // this module flag.
6167 if (HasObjCFlag && !HasClassProperties) {
6168 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6169 (uint32_t)0);
6170 Changed = true;
6171 }
6172
6173 if (HasSwiftVersionFlag) {
6174 M.addModuleFlag(Module::Error, "Swift ABI Version",
6175 SwiftABIVersion);
6176 M.addModuleFlag(Module::Error, "Swift Major Version",
6177 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6178 M.addModuleFlag(Module::Error, "Swift Minor Version",
6179 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6180 Changed = true;
6181 }
6182
6183 return Changed;
6184}
6185
6187 auto TrimSpaces = [](StringRef Section) -> std::string {
6188 SmallVector<StringRef, 5> Components;
6189 Section.split(Components, ',');
6190
6191 SmallString<32> Buffer;
6192 raw_svector_ostream OS(Buffer);
6193
6194 for (auto Component : Components)
6195 OS << ',' << Component.trim();
6196
6197 return std::string(OS.str().substr(1));
6198 };
6199
6200 for (auto &GV : M.globals()) {
6201 if (!GV.hasSection())
6202 continue;
6203
6204 StringRef Section = GV.getSection();
6205
6206 if (!Section.starts_with("__DATA, __objc_catlist"))
6207 continue;
6208
6209 // __DATA, __objc_catlist, regular, no_dead_strip
6210 // __DATA,__objc_catlist,regular,no_dead_strip
6211 GV.setSection(TrimSpaces(Section));
6212 }
6213}
6214
6215namespace {
6216// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6217// callsites within a function that did not also have the strictfp attribute.
6218// Since 10.0, if strict FP semantics are needed within a function, the
6219// function must have the strictfp attribute and all calls within the function
6220// must also have the strictfp attribute. This latter restriction is
6221// necessary to prevent unwanted libcall simplification when a function is
6222// being cloned (such as for inlining).
6223//
6224// The "dangling" strictfp attribute usage was only used to prevent constant
6225// folding and other libcall simplification. The nobuiltin attribute on the
6226// callsite has the same effect.
6227struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6228 StrictFPUpgradeVisitor() = default;
6229
6230 void visitCallBase(CallBase &Call) {
6231 if (!Call.isStrictFP())
6232 return;
6234 return;
6235 // If we get here, the caller doesn't have the strictfp attribute
6236 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6237 Call.removeFnAttr(Attribute::StrictFP);
6238 Call.addFnAttr(Attribute::NoBuiltin);
6239 }
6240};
6241
6242/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6243struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6244 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6245 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6246
6247 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6248 if (!RMW.isFloatingPointOperation())
6249 return;
6250
6251 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6252 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6253 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6254 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6255 }
6256};
6257} // namespace
6258
6260 // If a function definition doesn't have the strictfp attribute,
6261 // convert any callsite strictfp attributes to nobuiltin.
6262 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6263 StrictFPUpgradeVisitor SFPV;
6264 SFPV.visit(F);
6265 }
6266
6267 // Remove all incompatibile attributes from function.
6268 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6269 F.getReturnType(), F.getAttributes().getRetAttrs()));
6270 for (auto &Arg : F.args())
6271 Arg.removeAttrs(
6272 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6273
6274 // Older versions of LLVM treated an "implicit-section-name" attribute
6275 // similarly to directly setting the section on a Function.
6276 if (Attribute A = F.getFnAttribute("implicit-section-name");
6277 A.isValid() && A.isStringAttribute()) {
6278 F.setSection(A.getValueAsString());
6279 F.removeFnAttr("implicit-section-name");
6280 }
6281
6282 if (!F.empty()) {
6283 // For some reason this is called twice, and the first time is before any
6284 // instructions are loaded into the body.
6285
6286 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6287 A.isValid()) {
6288
6289 if (A.getValueAsBool()) {
6290 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6291 Visitor.visit(F);
6292 }
6293
6294 // We will leave behind dead attribute uses on external declarations, but
6295 // clang never added these to declarations anyway.
6296 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6297 }
6298 }
6299}
6300
6301// Check if the function attribute is not present and set it.
6303 StringRef Value) {
6304 if (!F.hasFnAttribute(FnAttrName))
6305 F.addFnAttr(FnAttrName, Value);
6306}
6307
6308// Check if the function attribute is not present and set it if needed.
6309// If the attribute is "false" then removes it.
6310// If the attribute is "true" resets it to a valueless attribute.
6311static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6312 if (!F.hasFnAttribute(FnAttrName)) {
6313 if (Set)
6314 F.addFnAttr(FnAttrName);
6315 } else {
6316 auto A = F.getFnAttribute(FnAttrName);
6317 if ("false" == A.getValueAsString())
6318 F.removeFnAttr(FnAttrName);
6319 else if ("true" == A.getValueAsString()) {
6320 F.removeFnAttr(FnAttrName);
6321 F.addFnAttr(FnAttrName);
6322 }
6323 }
6324}
6325
6327 Triple T(M.getTargetTriple());
6328 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6329 return;
6330
6331 uint64_t BTEValue = 0;
6332 uint64_t BPPLRValue = 0;
6333 uint64_t GCSValue = 0;
6334 uint64_t SRAValue = 0;
6335 uint64_t SRAALLValue = 0;
6336 uint64_t SRABKeyValue = 0;
6337
6338 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6339 if (ModFlags) {
6340 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6341 MDNode *Op = ModFlags->getOperand(I);
6342 if (Op->getNumOperands() != 3)
6343 continue;
6344
6345 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6346 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6347 if (!ID || !CI)
6348 continue;
6349
6350 StringRef IDStr = ID->getString();
6351 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6352 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6353 : IDStr == "guarded-control-stack" ? &GCSValue
6354 : IDStr == "sign-return-address" ? &SRAValue
6355 : IDStr == "sign-return-address-all" ? &SRAALLValue
6356 : IDStr == "sign-return-address-with-bkey"
6357 ? &SRABKeyValue
6358 : nullptr;
6359 if (!ValPtr)
6360 continue;
6361
6362 *ValPtr = CI->getZExtValue();
6363 if (*ValPtr == 2)
6364 return;
6365 }
6366 }
6367
6368 bool BTE = BTEValue == 1;
6369 bool BPPLR = BPPLRValue == 1;
6370 bool GCS = GCSValue == 1;
6371 bool SRA = SRAValue == 1;
6372
6373 StringRef SignTypeValue = "non-leaf";
6374 if (SRA && SRAALLValue == 1)
6375 SignTypeValue = "all";
6376
6377 StringRef SignKeyValue = "a_key";
6378 if (SRA && SRABKeyValue == 1)
6379 SignKeyValue = "b_key";
6380
6381 for (Function &F : M.getFunctionList()) {
6382 if (F.isDeclaration())
6383 continue;
6384
6385 if (SRA) {
6386 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6387 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6388 } else {
6389 if (auto A = F.getFnAttribute("sign-return-address");
6390 A.isValid() && "none" == A.getValueAsString()) {
6391 F.removeFnAttr("sign-return-address");
6392 F.removeFnAttr("sign-return-address-key");
6393 }
6394 }
6395 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6396 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6397 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6398 }
6399
6400 if (BTE)
6401 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6402 if (BPPLR)
6403 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6404 if (GCS)
6405 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6406 if (SRA) {
6407 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6408 if (SRAALLValue == 1)
6409 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6410 if (SRABKeyValue == 1)
6411 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6412 }
6413}
6414
6415static bool isOldLoopArgument(Metadata *MD) {
6416 auto *T = dyn_cast_or_null<MDTuple>(MD);
6417 if (!T)
6418 return false;
6419 if (T->getNumOperands() < 1)
6420 return false;
6421 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6422 if (!S)
6423 return false;
6424 return S->getString().starts_with("llvm.vectorizer.");
6425}
6426
6428 StringRef OldPrefix = "llvm.vectorizer.";
6429 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6430
6431 if (OldTag == "llvm.vectorizer.unroll")
6432 return MDString::get(C, "llvm.loop.interleave.count");
6433
6434 return MDString::get(
6435 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6436 .str());
6437}
6438
6440 auto *T = dyn_cast_or_null<MDTuple>(MD);
6441 if (!T)
6442 return MD;
6443 if (T->getNumOperands() < 1)
6444 return MD;
6445 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6446 if (!OldTag)
6447 return MD;
6448 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6449 return MD;
6450
6451 // This has an old tag. Upgrade it.
6453 Ops.reserve(T->getNumOperands());
6454 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6455 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6456 Ops.push_back(T->getOperand(I));
6457
6458 return MDTuple::get(T->getContext(), Ops);
6459}
6460
6462 auto *T = dyn_cast<MDTuple>(&N);
6463 if (!T)
6464 return &N;
6465
6466 if (none_of(T->operands(), isOldLoopArgument))
6467 return &N;
6468
6470 Ops.reserve(T->getNumOperands());
6471 for (Metadata *MD : T->operands())
6472 Ops.push_back(upgradeLoopArgument(MD));
6473
6474 return MDTuple::get(T->getContext(), Ops);
6475}
6476
6478 Triple T(TT);
6479 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6480 // the address space of globals to 1. This does not apply to SPIRV Logical.
6481 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6482 !DL.contains("-G") && !DL.starts_with("G")) {
6483 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6484 }
6485
6486 if (T.isLoongArch64() || T.isRISCV64()) {
6487 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6488 auto I = DL.find("-n64-");
6489 if (I != StringRef::npos)
6490 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6491 return DL.str();
6492 }
6493
6494 // AMDGPU data layout upgrades.
6495 std::string Res = DL.str();
6496 if (T.isAMDGPU()) {
6497 // Define address spaces for constants.
6498 if (!DL.contains("-G") && !DL.starts_with("G"))
6499 Res.append(Res.empty() ? "G1" : "-G1");
6500
6501 // AMDGCN data layout upgrades.
6502 if (T.isAMDGCN()) {
6503
6504 // Add missing non-integral declarations.
6505 // This goes before adding new address spaces to prevent incoherent string
6506 // values.
6507 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6508 Res.append("-ni:7:8:9");
6509 // Update ni:7 to ni:7:8:9.
6510 if (DL.ends_with("ni:7"))
6511 Res.append(":8:9");
6512 if (DL.ends_with("ni:7:8"))
6513 Res.append(":9");
6514
6515 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6516 // resources) An empty data layout has already been upgraded to G1 by now.
6517 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6518 Res.append("-p7:160:256:256:32");
6519 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6520 Res.append("-p8:128:128:128:48");
6521 constexpr StringRef OldP8("-p8:128:128-");
6522 if (DL.contains(OldP8))
6523 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6524 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6525 Res.append("-p9:192:256:256:32");
6526 }
6527
6528 // Upgrade the ELF mangling mode.
6529 if (!DL.contains("m:e"))
6530 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6531
6532 return Res;
6533 }
6534
6535 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6536 // If the datalayout matches the expected format, add pointer size address
6537 // spaces to the datalayout.
6538 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6539 if (!DL.contains(AddrSpaces)) {
6541 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6542 if (R.match(Res, &Groups))
6543 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6544 }
6545 };
6546
6547 // AArch64 data layout upgrades.
6548 if (T.isAArch64()) {
6549 // Add "-Fn32"
6550 if (!DL.empty() && !DL.contains("-Fn32"))
6551 Res.append("-Fn32");
6552 AddPtr32Ptr64AddrSpaces();
6553 return Res;
6554 }
6555
6556 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6557 T.isWasm()) {
6558 // Mips64 with o32 ABI did not add "-i128:128".
6559 // Add "-i128:128"
6560 std::string I64 = "-i64:64";
6561 std::string I128 = "-i128:128";
6562 if (!StringRef(Res).contains(I128)) {
6563 size_t Pos = Res.find(I64);
6564 if (Pos != size_t(-1))
6565 Res.insert(Pos + I64.size(), I128);
6566 }
6567 }
6568
6569 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6570 size_t Pos = Res.find("-S128");
6571 if (Pos == StringRef::npos)
6572 Pos = Res.size();
6573 Res.insert(Pos, "-f64:32:64");
6574 }
6575
6576 if (!T.isX86())
6577 return Res;
6578
6579 AddPtr32Ptr64AddrSpaces();
6580
6581 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6582 // for i128 operations prior to this being reflected in the data layout, and
6583 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6584 // boundaries, so although this is a breaking change, the upgrade is expected
6585 // to fix more IR than it breaks.
6586 // Intel MCU is an exception and uses 4-byte-alignment.
6587 if (!T.isOSIAMCU()) {
6588 std::string I128 = "-i128:128";
6589 if (StringRef Ref = Res; !Ref.contains(I128)) {
6591 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6592 if (R.match(Res, &Groups))
6593 Res = (Groups[1] + I128 + Groups[3]).str();
6594 }
6595 }
6596
6597 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6598 // Raising the alignment is safe because Clang did not produce f80 values in
6599 // the MSVC environment before this upgrade was added.
6600 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6601 StringRef Ref = Res;
6602 auto I = Ref.find("-f80:32-");
6603 if (I != StringRef::npos)
6604 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6605 }
6606
6607 return Res;
6608}
6609
6610void llvm::UpgradeAttributes(AttrBuilder &B) {
6611 StringRef FramePointer;
6612 Attribute A = B.getAttribute("no-frame-pointer-elim");
6613 if (A.isValid()) {
6614 // The value can be "true" or "false".
6615 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6616 B.removeAttribute("no-frame-pointer-elim");
6617 }
6618 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6619 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6620 if (FramePointer != "all")
6621 FramePointer = "non-leaf";
6622 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6623 }
6624 if (!FramePointer.empty())
6625 B.addAttribute("frame-pointer", FramePointer);
6626
6627 A = B.getAttribute("null-pointer-is-valid");
6628 if (A.isValid()) {
6629 // The value can be "true" or "false".
6630 bool NullPointerIsValid = A.getValueAsString() == "true";
6631 B.removeAttribute("null-pointer-is-valid");
6632 if (NullPointerIsValid)
6633 B.addAttribute(Attribute::NullPointerIsValid);
6634 }
6635}
6636
6637void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6638 // clang.arc.attachedcall bundles are now required to have an operand.
6639 // If they don't, it's okay to drop them entirely: when there is an operand,
6640 // the "attachedcall" is meaningful and required, but without an operand,
6641 // it's just a marker NOP. Dropping it merely prevents an optimization.
6642 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6643 return OBD.getTag() == "clang.arc.attachedcall" &&
6644 OBD.inputs().empty();
6645 });
6646}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:451
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2794
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:824
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:826
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:233
unsigned getNumOperands() const
Definition User.h:255
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2168
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106