LLVM 20.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/MDBuilder.h"
38#include "llvm/IR/Metadata.h"
39#include "llvm/IR/Module.h"
40#include "llvm/IR/Value.h"
41#include "llvm/IR/Verifier.h"
45#include "llvm/Support/Regex.h"
47#include <cstring>
48
49using namespace llvm;
50
51static cl::opt<bool>
52 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
53 cl::desc("Disable autoupgrade of debug info"));
54
55static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
56
57// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
58// changed their type from v4f32 to v2i64.
60 Function *&NewFn) {
61 // Check whether this is an old version of the function, which received
62 // v4f32 arguments.
63 Type *Arg0Type = F->getFunctionType()->getParamType(0);
64 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
65 return false;
66
67 // Yes, it's old, replace it with new version.
68 rename(F);
69 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
70 return true;
71}
72
73// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
74// arguments have changed their type from i32 to i8.
76 Function *&NewFn) {
77 // Check that the last argument is an i32.
78 Type *LastArgType = F->getFunctionType()->getParamType(
79 F->getFunctionType()->getNumParams() - 1);
80 if (!LastArgType->isIntegerTy(32))
81 return false;
82
83 // Move this function aside and map down.
84 rename(F);
85 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
86 return true;
87}
88
89// Upgrade the declaration of fp compare intrinsics that change return type
90// from scalar to vXi1 mask.
92 Function *&NewFn) {
93 // Check if the return type is a vector.
94 if (F->getReturnType()->isVectorTy())
95 return false;
96
97 rename(F);
98 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
99 return true;
100}
101
103 Function *&NewFn) {
104 if (F->getReturnType()->getScalarType()->isBFloatTy())
105 return false;
106
107 rename(F);
108 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
109 return true;
110}
111
113 Function *&NewFn) {
114 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
115 return false;
116
117 rename(F);
118 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
119 return true;
120}
121
123 // All of the intrinsics matches below should be marked with which llvm
124 // version started autoupgrading them. At some point in the future we would
125 // like to use this information to remove upgrade code for some older
126 // intrinsics. It is currently undecided how we will determine that future
127 // point.
128 if (Name.consume_front("avx."))
129 return (Name.starts_with("blend.p") || // Added in 3.7
130 Name == "cvt.ps2.pd.256" || // Added in 3.9
131 Name == "cvtdq2.pd.256" || // Added in 3.9
132 Name == "cvtdq2.ps.256" || // Added in 7.0
133 Name.starts_with("movnt.") || // Added in 3.2
134 Name.starts_with("sqrt.p") || // Added in 7.0
135 Name.starts_with("storeu.") || // Added in 3.9
136 Name.starts_with("vbroadcast.s") || // Added in 3.5
137 Name.starts_with("vbroadcastf128") || // Added in 4.0
138 Name.starts_with("vextractf128.") || // Added in 3.7
139 Name.starts_with("vinsertf128.") || // Added in 3.7
140 Name.starts_with("vperm2f128.") || // Added in 6.0
141 Name.starts_with("vpermil.")); // Added in 3.1
142
143 if (Name.consume_front("avx2."))
144 return (Name == "movntdqa" || // Added in 5.0
145 Name.starts_with("pabs.") || // Added in 6.0
146 Name.starts_with("padds.") || // Added in 8.0
147 Name.starts_with("paddus.") || // Added in 8.0
148 Name.starts_with("pblendd.") || // Added in 3.7
149 Name == "pblendw" || // Added in 3.7
150 Name.starts_with("pbroadcast") || // Added in 3.8
151 Name.starts_with("pcmpeq.") || // Added in 3.1
152 Name.starts_with("pcmpgt.") || // Added in 3.1
153 Name.starts_with("pmax") || // Added in 3.9
154 Name.starts_with("pmin") || // Added in 3.9
155 Name.starts_with("pmovsx") || // Added in 3.9
156 Name.starts_with("pmovzx") || // Added in 3.9
157 Name == "pmul.dq" || // Added in 7.0
158 Name == "pmulu.dq" || // Added in 7.0
159 Name.starts_with("psll.dq") || // Added in 3.7
160 Name.starts_with("psrl.dq") || // Added in 3.7
161 Name.starts_with("psubs.") || // Added in 8.0
162 Name.starts_with("psubus.") || // Added in 8.0
163 Name.starts_with("vbroadcast") || // Added in 3.8
164 Name == "vbroadcasti128" || // Added in 3.7
165 Name == "vextracti128" || // Added in 3.7
166 Name == "vinserti128" || // Added in 3.7
167 Name == "vperm2i128"); // Added in 6.0
168
169 if (Name.consume_front("avx512.")) {
170 if (Name.consume_front("mask."))
171 // 'avx512.mask.*'
172 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
173 Name.starts_with("and.") || // Added in 3.9
174 Name.starts_with("andn.") || // Added in 3.9
175 Name.starts_with("broadcast.s") || // Added in 3.9
176 Name.starts_with("broadcastf32x4.") || // Added in 6.0
177 Name.starts_with("broadcastf32x8.") || // Added in 6.0
178 Name.starts_with("broadcastf64x2.") || // Added in 6.0
179 Name.starts_with("broadcastf64x4.") || // Added in 6.0
180 Name.starts_with("broadcasti32x4.") || // Added in 6.0
181 Name.starts_with("broadcasti32x8.") || // Added in 6.0
182 Name.starts_with("broadcasti64x2.") || // Added in 6.0
183 Name.starts_with("broadcasti64x4.") || // Added in 6.0
184 Name.starts_with("cmp.b") || // Added in 5.0
185 Name.starts_with("cmp.d") || // Added in 5.0
186 Name.starts_with("cmp.q") || // Added in 5.0
187 Name.starts_with("cmp.w") || // Added in 5.0
188 Name.starts_with("compress.b") || // Added in 9.0
189 Name.starts_with("compress.d") || // Added in 9.0
190 Name.starts_with("compress.p") || // Added in 9.0
191 Name.starts_with("compress.q") || // Added in 9.0
192 Name.starts_with("compress.store.") || // Added in 7.0
193 Name.starts_with("compress.w") || // Added in 9.0
194 Name.starts_with("conflict.") || // Added in 9.0
195 Name.starts_with("cvtdq2pd.") || // Added in 4.0
196 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
197 Name == "cvtpd2dq.256" || // Added in 7.0
198 Name == "cvtpd2ps.256" || // Added in 7.0
199 Name == "cvtps2pd.128" || // Added in 7.0
200 Name == "cvtps2pd.256" || // Added in 7.0
201 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
202 Name == "cvtqq2ps.256" || // Added in 9.0
203 Name == "cvtqq2ps.512" || // Added in 9.0
204 Name == "cvttpd2dq.256" || // Added in 7.0
205 Name == "cvttps2dq.128" || // Added in 7.0
206 Name == "cvttps2dq.256" || // Added in 7.0
207 Name.starts_with("cvtudq2pd.") || // Added in 4.0
208 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
209 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
210 Name == "cvtuqq2ps.256" || // Added in 9.0
211 Name == "cvtuqq2ps.512" || // Added in 9.0
212 Name.starts_with("dbpsadbw.") || // Added in 7.0
213 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
214 Name.starts_with("expand.b") || // Added in 9.0
215 Name.starts_with("expand.d") || // Added in 9.0
216 Name.starts_with("expand.load.") || // Added in 7.0
217 Name.starts_with("expand.p") || // Added in 9.0
218 Name.starts_with("expand.q") || // Added in 9.0
219 Name.starts_with("expand.w") || // Added in 9.0
220 Name.starts_with("fpclass.p") || // Added in 7.0
221 Name.starts_with("insert") || // Added in 4.0
222 Name.starts_with("load.") || // Added in 3.9
223 Name.starts_with("loadu.") || // Added in 3.9
224 Name.starts_with("lzcnt.") || // Added in 5.0
225 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
226 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
227 Name.starts_with("movddup") || // Added in 3.9
228 Name.starts_with("move.s") || // Added in 4.0
229 Name.starts_with("movshdup") || // Added in 3.9
230 Name.starts_with("movsldup") || // Added in 3.9
231 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
232 Name.starts_with("or.") || // Added in 3.9
233 Name.starts_with("pabs.") || // Added in 6.0
234 Name.starts_with("packssdw.") || // Added in 5.0
235 Name.starts_with("packsswb.") || // Added in 5.0
236 Name.starts_with("packusdw.") || // Added in 5.0
237 Name.starts_with("packuswb.") || // Added in 5.0
238 Name.starts_with("padd.") || // Added in 4.0
239 Name.starts_with("padds.") || // Added in 8.0
240 Name.starts_with("paddus.") || // Added in 8.0
241 Name.starts_with("palignr.") || // Added in 3.9
242 Name.starts_with("pand.") || // Added in 3.9
243 Name.starts_with("pandn.") || // Added in 3.9
244 Name.starts_with("pavg") || // Added in 6.0
245 Name.starts_with("pbroadcast") || // Added in 6.0
246 Name.starts_with("pcmpeq.") || // Added in 3.9
247 Name.starts_with("pcmpgt.") || // Added in 3.9
248 Name.starts_with("perm.df.") || // Added in 3.9
249 Name.starts_with("perm.di.") || // Added in 3.9
250 Name.starts_with("permvar.") || // Added in 7.0
251 Name.starts_with("pmaddubs.w.") || // Added in 7.0
252 Name.starts_with("pmaddw.d.") || // Added in 7.0
253 Name.starts_with("pmax") || // Added in 4.0
254 Name.starts_with("pmin") || // Added in 4.0
255 Name == "pmov.qd.256" || // Added in 9.0
256 Name == "pmov.qd.512" || // Added in 9.0
257 Name == "pmov.wb.256" || // Added in 9.0
258 Name == "pmov.wb.512" || // Added in 9.0
259 Name.starts_with("pmovsx") || // Added in 4.0
260 Name.starts_with("pmovzx") || // Added in 4.0
261 Name.starts_with("pmul.dq.") || // Added in 4.0
262 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
263 Name.starts_with("pmulh.w.") || // Added in 7.0
264 Name.starts_with("pmulhu.w.") || // Added in 7.0
265 Name.starts_with("pmull.") || // Added in 4.0
266 Name.starts_with("pmultishift.qb.") || // Added in 8.0
267 Name.starts_with("pmulu.dq.") || // Added in 4.0
268 Name.starts_with("por.") || // Added in 3.9
269 Name.starts_with("prol.") || // Added in 8.0
270 Name.starts_with("prolv.") || // Added in 8.0
271 Name.starts_with("pror.") || // Added in 8.0
272 Name.starts_with("prorv.") || // Added in 8.0
273 Name.starts_with("pshuf.b.") || // Added in 4.0
274 Name.starts_with("pshuf.d.") || // Added in 3.9
275 Name.starts_with("pshufh.w.") || // Added in 3.9
276 Name.starts_with("pshufl.w.") || // Added in 3.9
277 Name.starts_with("psll.d") || // Added in 4.0
278 Name.starts_with("psll.q") || // Added in 4.0
279 Name.starts_with("psll.w") || // Added in 4.0
280 Name.starts_with("pslli") || // Added in 4.0
281 Name.starts_with("psllv") || // Added in 4.0
282 Name.starts_with("psra.d") || // Added in 4.0
283 Name.starts_with("psra.q") || // Added in 4.0
284 Name.starts_with("psra.w") || // Added in 4.0
285 Name.starts_with("psrai") || // Added in 4.0
286 Name.starts_with("psrav") || // Added in 4.0
287 Name.starts_with("psrl.d") || // Added in 4.0
288 Name.starts_with("psrl.q") || // Added in 4.0
289 Name.starts_with("psrl.w") || // Added in 4.0
290 Name.starts_with("psrli") || // Added in 4.0
291 Name.starts_with("psrlv") || // Added in 4.0
292 Name.starts_with("psub.") || // Added in 4.0
293 Name.starts_with("psubs.") || // Added in 8.0
294 Name.starts_with("psubus.") || // Added in 8.0
295 Name.starts_with("pternlog.") || // Added in 7.0
296 Name.starts_with("punpckh") || // Added in 3.9
297 Name.starts_with("punpckl") || // Added in 3.9
298 Name.starts_with("pxor.") || // Added in 3.9
299 Name.starts_with("shuf.f") || // Added in 6.0
300 Name.starts_with("shuf.i") || // Added in 6.0
301 Name.starts_with("shuf.p") || // Added in 4.0
302 Name.starts_with("sqrt.p") || // Added in 7.0
303 Name.starts_with("store.b.") || // Added in 3.9
304 Name.starts_with("store.d.") || // Added in 3.9
305 Name.starts_with("store.p") || // Added in 3.9
306 Name.starts_with("store.q.") || // Added in 3.9
307 Name.starts_with("store.w.") || // Added in 3.9
308 Name == "store.ss" || // Added in 7.0
309 Name.starts_with("storeu.") || // Added in 3.9
310 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
311 Name.starts_with("ucmp.") || // Added in 5.0
312 Name.starts_with("unpckh.") || // Added in 3.9
313 Name.starts_with("unpckl.") || // Added in 3.9
314 Name.starts_with("valign.") || // Added in 4.0
315 Name == "vcvtph2ps.128" || // Added in 11.0
316 Name == "vcvtph2ps.256" || // Added in 11.0
317 Name.starts_with("vextract") || // Added in 4.0
318 Name.starts_with("vfmadd.") || // Added in 7.0
319 Name.starts_with("vfmaddsub.") || // Added in 7.0
320 Name.starts_with("vfnmadd.") || // Added in 7.0
321 Name.starts_with("vfnmsub.") || // Added in 7.0
322 Name.starts_with("vpdpbusd.") || // Added in 7.0
323 Name.starts_with("vpdpbusds.") || // Added in 7.0
324 Name.starts_with("vpdpwssd.") || // Added in 7.0
325 Name.starts_with("vpdpwssds.") || // Added in 7.0
326 Name.starts_with("vpermi2var.") || // Added in 7.0
327 Name.starts_with("vpermil.p") || // Added in 3.9
328 Name.starts_with("vpermilvar.") || // Added in 4.0
329 Name.starts_with("vpermt2var.") || // Added in 7.0
330 Name.starts_with("vpmadd52") || // Added in 7.0
331 Name.starts_with("vpshld.") || // Added in 7.0
332 Name.starts_with("vpshldv.") || // Added in 8.0
333 Name.starts_with("vpshrd.") || // Added in 7.0
334 Name.starts_with("vpshrdv.") || // Added in 8.0
335 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
336 Name.starts_with("xor.")); // Added in 3.9
337
338 if (Name.consume_front("mask3."))
339 // 'avx512.mask3.*'
340 return (Name.starts_with("vfmadd.") || // Added in 7.0
341 Name.starts_with("vfmaddsub.") || // Added in 7.0
342 Name.starts_with("vfmsub.") || // Added in 7.0
343 Name.starts_with("vfmsubadd.") || // Added in 7.0
344 Name.starts_with("vfnmsub.")); // Added in 7.0
345
346 if (Name.consume_front("maskz."))
347 // 'avx512.maskz.*'
348 return (Name.starts_with("pternlog.") || // Added in 7.0
349 Name.starts_with("vfmadd.") || // Added in 7.0
350 Name.starts_with("vfmaddsub.") || // Added in 7.0
351 Name.starts_with("vpdpbusd.") || // Added in 7.0
352 Name.starts_with("vpdpbusds.") || // Added in 7.0
353 Name.starts_with("vpdpwssd.") || // Added in 7.0
354 Name.starts_with("vpdpwssds.") || // Added in 7.0
355 Name.starts_with("vpermt2var.") || // Added in 7.0
356 Name.starts_with("vpmadd52") || // Added in 7.0
357 Name.starts_with("vpshldv.") || // Added in 8.0
358 Name.starts_with("vpshrdv.")); // Added in 8.0
359
360 // 'avx512.*'
361 return (Name == "movntdqa" || // Added in 5.0
362 Name == "pmul.dq.512" || // Added in 7.0
363 Name == "pmulu.dq.512" || // Added in 7.0
364 Name.starts_with("broadcastm") || // Added in 6.0
365 Name.starts_with("cmp.p") || // Added in 12.0
366 Name.starts_with("cvtb2mask.") || // Added in 7.0
367 Name.starts_with("cvtd2mask.") || // Added in 7.0
368 Name.starts_with("cvtmask2") || // Added in 5.0
369 Name.starts_with("cvtq2mask.") || // Added in 7.0
370 Name == "cvtusi2sd" || // Added in 7.0
371 Name.starts_with("cvtw2mask.") || // Added in 7.0
372 Name == "kand.w" || // Added in 7.0
373 Name == "kandn.w" || // Added in 7.0
374 Name == "knot.w" || // Added in 7.0
375 Name == "kor.w" || // Added in 7.0
376 Name == "kortestc.w" || // Added in 7.0
377 Name == "kortestz.w" || // Added in 7.0
378 Name.starts_with("kunpck") || // added in 6.0
379 Name == "kxnor.w" || // Added in 7.0
380 Name == "kxor.w" || // Added in 7.0
381 Name.starts_with("padds.") || // Added in 8.0
382 Name.starts_with("pbroadcast") || // Added in 3.9
383 Name.starts_with("prol") || // Added in 8.0
384 Name.starts_with("pror") || // Added in 8.0
385 Name.starts_with("psll.dq") || // Added in 3.9
386 Name.starts_with("psrl.dq") || // Added in 3.9
387 Name.starts_with("psubs.") || // Added in 8.0
388 Name.starts_with("ptestm") || // Added in 6.0
389 Name.starts_with("ptestnm") || // Added in 6.0
390 Name.starts_with("storent.") || // Added in 3.9
391 Name.starts_with("vbroadcast.s") || // Added in 7.0
392 Name.starts_with("vpshld.") || // Added in 8.0
393 Name.starts_with("vpshrd.")); // Added in 8.0
394 }
395
396 if (Name.consume_front("fma."))
397 return (Name.starts_with("vfmadd.") || // Added in 7.0
398 Name.starts_with("vfmsub.") || // Added in 7.0
399 Name.starts_with("vfmsubadd.") || // Added in 7.0
400 Name.starts_with("vfnmadd.") || // Added in 7.0
401 Name.starts_with("vfnmsub.")); // Added in 7.0
402
403 if (Name.consume_front("fma4."))
404 return Name.starts_with("vfmadd.s"); // Added in 7.0
405
406 if (Name.consume_front("sse."))
407 return (Name == "add.ss" || // Added in 4.0
408 Name == "cvtsi2ss" || // Added in 7.0
409 Name == "cvtsi642ss" || // Added in 7.0
410 Name == "div.ss" || // Added in 4.0
411 Name == "mul.ss" || // Added in 4.0
412 Name.starts_with("sqrt.p") || // Added in 7.0
413 Name == "sqrt.ss" || // Added in 7.0
414 Name.starts_with("storeu.") || // Added in 3.9
415 Name == "sub.ss"); // Added in 4.0
416
417 if (Name.consume_front("sse2."))
418 return (Name == "add.sd" || // Added in 4.0
419 Name == "cvtdq2pd" || // Added in 3.9
420 Name == "cvtdq2ps" || // Added in 7.0
421 Name == "cvtps2pd" || // Added in 3.9
422 Name == "cvtsi2sd" || // Added in 7.0
423 Name == "cvtsi642sd" || // Added in 7.0
424 Name == "cvtss2sd" || // Added in 7.0
425 Name == "div.sd" || // Added in 4.0
426 Name == "mul.sd" || // Added in 4.0
427 Name.starts_with("padds.") || // Added in 8.0
428 Name.starts_with("paddus.") || // Added in 8.0
429 Name.starts_with("pcmpeq.") || // Added in 3.1
430 Name.starts_with("pcmpgt.") || // Added in 3.1
431 Name == "pmaxs.w" || // Added in 3.9
432 Name == "pmaxu.b" || // Added in 3.9
433 Name == "pmins.w" || // Added in 3.9
434 Name == "pminu.b" || // Added in 3.9
435 Name == "pmulu.dq" || // Added in 7.0
436 Name.starts_with("pshuf") || // Added in 3.9
437 Name.starts_with("psll.dq") || // Added in 3.7
438 Name.starts_with("psrl.dq") || // Added in 3.7
439 Name.starts_with("psubs.") || // Added in 8.0
440 Name.starts_with("psubus.") || // Added in 8.0
441 Name.starts_with("sqrt.p") || // Added in 7.0
442 Name == "sqrt.sd" || // Added in 7.0
443 Name == "storel.dq" || // Added in 3.9
444 Name.starts_with("storeu.") || // Added in 3.9
445 Name == "sub.sd"); // Added in 4.0
446
447 if (Name.consume_front("sse41."))
448 return (Name.starts_with("blendp") || // Added in 3.7
449 Name == "movntdqa" || // Added in 5.0
450 Name == "pblendw" || // Added in 3.7
451 Name == "pmaxsb" || // Added in 3.9
452 Name == "pmaxsd" || // Added in 3.9
453 Name == "pmaxud" || // Added in 3.9
454 Name == "pmaxuw" || // Added in 3.9
455 Name == "pminsb" || // Added in 3.9
456 Name == "pminsd" || // Added in 3.9
457 Name == "pminud" || // Added in 3.9
458 Name == "pminuw" || // Added in 3.9
459 Name.starts_with("pmovsx") || // Added in 3.8
460 Name.starts_with("pmovzx") || // Added in 3.9
461 Name == "pmuldq"); // Added in 7.0
462
463 if (Name.consume_front("sse42."))
464 return Name == "crc32.64.8"; // Added in 3.4
465
466 if (Name.consume_front("sse4a."))
467 return Name.starts_with("movnt."); // Added in 3.9
468
469 if (Name.consume_front("ssse3."))
470 return (Name == "pabs.b.128" || // Added in 6.0
471 Name == "pabs.d.128" || // Added in 6.0
472 Name == "pabs.w.128"); // Added in 6.0
473
474 if (Name.consume_front("xop."))
475 return (Name == "vpcmov" || // Added in 3.8
476 Name == "vpcmov.256" || // Added in 5.0
477 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
478 Name.starts_with("vprot")); // Added in 8.0
479
480 return (Name == "addcarry.u32" || // Added in 8.0
481 Name == "addcarry.u64" || // Added in 8.0
482 Name == "addcarryx.u32" || // Added in 8.0
483 Name == "addcarryx.u64" || // Added in 8.0
484 Name == "subborrow.u32" || // Added in 8.0
485 Name == "subborrow.u64" || // Added in 8.0
486 Name.starts_with("vcvtph2ps.")); // Added in 11.0
487}
488
490 Function *&NewFn) {
491 // Only handle intrinsics that start with "x86.".
492 if (!Name.consume_front("x86."))
493 return false;
494
496 NewFn = nullptr;
497 return true;
498 }
499
500 if (Name == "rdtscp") { // Added in 8.0
501 // If this intrinsic has 0 operands, it's the new version.
502 if (F->getFunctionType()->getNumParams() == 0)
503 return false;
504
505 rename(F);
506 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
507 Intrinsic::x86_rdtscp);
508 return true;
509 }
510
512
513 // SSE4.1 ptest functions may have an old signature.
514 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
516 .Case("c", Intrinsic::x86_sse41_ptestc)
517 .Case("z", Intrinsic::x86_sse41_ptestz)
518 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
521 return upgradePTESTIntrinsic(F, ID, NewFn);
522
523 return false;
524 }
525
526 // Several blend and other instructions with masks used the wrong number of
527 // bits.
528
529 // Added in 3.6
531 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
532 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
533 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
534 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
535 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
536 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
539 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
540
541 if (Name.consume_front("avx512.mask.cmp.")) {
542 // Added in 7.0
544 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
545 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
546 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
547 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
548 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
549 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
552 return upgradeX86MaskedFPCompare(F, ID, NewFn);
553 return false; // No other 'x86.avx523.mask.cmp.*'.
554 }
555
556 if (Name.consume_front("avx512bf16.")) {
557 // Added in 9.0
559 .Case("cvtne2ps2bf16.128",
560 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
561 .Case("cvtne2ps2bf16.256",
562 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
563 .Case("cvtne2ps2bf16.512",
564 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
565 .Case("mask.cvtneps2bf16.128",
566 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
567 .Case("cvtneps2bf16.256",
568 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
569 .Case("cvtneps2bf16.512",
570 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
573 return upgradeX86BF16Intrinsic(F, ID, NewFn);
574
575 // Added in 9.0
577 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
578 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
579 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
582 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
583 return false; // No other 'x86.avx512bf16.*'.
584 }
585
586 if (Name.consume_front("xop.")) {
588 if (Name.starts_with("vpermil2")) { // Added in 3.9
589 // Upgrade any XOP PERMIL2 index operand still using a float/double
590 // vector.
591 auto Idx = F->getFunctionType()->getParamType(2);
592 if (Idx->isFPOrFPVectorTy()) {
593 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
594 unsigned EltSize = Idx->getScalarSizeInBits();
595 if (EltSize == 64 && IdxSize == 128)
596 ID = Intrinsic::x86_xop_vpermil2pd;
597 else if (EltSize == 32 && IdxSize == 128)
598 ID = Intrinsic::x86_xop_vpermil2ps;
599 else if (EltSize == 64 && IdxSize == 256)
600 ID = Intrinsic::x86_xop_vpermil2pd_256;
601 else
602 ID = Intrinsic::x86_xop_vpermil2ps_256;
603 }
604 } else if (F->arg_size() == 2)
605 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
607 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
608 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
610
612 rename(F);
613 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
614 return true;
615 }
616 return false; // No other 'x86.xop.*'
617 }
618
619 if (Name == "seh.recoverfp") {
620 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
621 Intrinsic::eh_recoverfp);
622 return true;
623 }
624
625 return false;
626}
627
628// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
629// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
632 Function *&NewFn) {
633 if (Name.starts_with("rbit")) {
634 // '(arm|aarch64).rbit'.
636 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
637 return true;
638 }
639
640 if (Name == "thread.pointer") {
641 // '(arm|aarch64).thread.pointer'.
642 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
643 Intrinsic::thread_pointer);
644 return true;
645 }
646
647 bool Neon = Name.consume_front("neon.");
648 if (Neon) {
649 // '(arm|aarch64).neon.*'.
650 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
651 // v16i8 respectively.
652 if (Name.consume_front("bfdot.")) {
653 // (arm|aarch64).neon.bfdot.*'.
656 .Cases("v2f32.v8i8", "v4f32.v16i8",
657 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
658 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
661 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
662 assert((OperandWidth == 64 || OperandWidth == 128) &&
663 "Unexpected operand width");
664 LLVMContext &Ctx = F->getParent()->getContext();
665 std::array<Type *, 2> Tys{
666 {F->getReturnType(),
667 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
668 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
669 return true;
670 }
671 return false; // No other '(arm|aarch64).neon.bfdot.*'.
672 }
673
674 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
675 // anymore and accept v8bf16 instead of v16i8.
676 if (Name.consume_front("bfm")) {
677 // (arm|aarch64).neon.bfm*'.
678 if (Name.consume_back(".v4f32.v16i8")) {
679 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
682 .Case("mla",
683 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
684 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
685 .Case("lalb",
686 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
687 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
688 .Case("lalt",
689 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
690 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
693 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
694 return true;
695 }
696 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
697 }
698 return false; // No other '(arm|aarch64).neon.bfm*.
699 }
700 // Continue on to Aarch64 Neon or Arm Neon.
701 }
702 // Continue on to Arm or Aarch64.
703
704 if (IsArm) {
705 // 'arm.*'.
706 if (Neon) {
707 // 'arm.neon.*'.
709 .StartsWith("vclz.", Intrinsic::ctlz)
710 .StartsWith("vcnt.", Intrinsic::ctpop)
711 .StartsWith("vqadds.", Intrinsic::sadd_sat)
712 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
713 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
714 .StartsWith("vqsubu.", Intrinsic::usub_sat)
717 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
718 F->arg_begin()->getType());
719 return true;
720 }
721
722 if (Name.consume_front("vst")) {
723 // 'arm.neon.vst*'.
724 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
726 if (vstRegex.match(Name, &Groups)) {
727 static const Intrinsic::ID StoreInts[] = {
728 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
729 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
730
731 static const Intrinsic::ID StoreLaneInts[] = {
732 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
733 Intrinsic::arm_neon_vst4lane};
734
735 auto fArgs = F->getFunctionType()->params();
736 Type *Tys[] = {fArgs[0], fArgs[1]};
737 if (Groups[1].size() == 1)
739 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
740 else
742 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
743 return true;
744 }
745 return false; // No other 'arm.neon.vst*'.
746 }
747
748 return false; // No other 'arm.neon.*'.
749 }
750
751 if (Name.consume_front("mve.")) {
752 // 'arm.mve.*'.
753 if (Name == "vctp64") {
754 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
755 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
756 // the function and deal with it below in UpgradeIntrinsicCall.
757 rename(F);
758 return true;
759 }
760 return false; // Not 'arm.mve.vctp64'.
761 }
762
763 // These too are changed to accept a v2i1 instead of the old v4i1.
764 if (Name.consume_back(".v4i1")) {
765 // 'arm.mve.*.v4i1'.
766 if (Name.consume_back(".predicated.v2i64.v4i32"))
767 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
768 return Name == "mull.int" || Name == "vqdmull";
769
770 if (Name.consume_back(".v2i64")) {
771 // 'arm.mve.*.v2i64.v4i1'
772 bool IsGather = Name.consume_front("vldr.gather.");
773 if (IsGather || Name.consume_front("vstr.scatter.")) {
774 if (Name.consume_front("base.")) {
775 // Optional 'wb.' prefix.
776 Name.consume_front("wb.");
777 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
778 // predicated.v2i64.v2i64.v4i1'.
779 return Name == "predicated.v2i64";
780 }
781
782 if (Name.consume_front("offset.predicated."))
783 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
784 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
785
786 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
787 return false;
788 }
789
790 return false; // No other 'arm.mve.*.v2i64.v4i1'.
791 }
792 return false; // No other 'arm.mve.*.v4i1'.
793 }
794 return false; // No other 'arm.mve.*'.
795 }
796
797 if (Name.consume_front("cde.vcx")) {
798 // 'arm.cde.vcx*'.
799 if (Name.consume_back(".predicated.v2i64.v4i1"))
800 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
801 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
802 Name == "3q" || Name == "3qa";
803
804 return false; // No other 'arm.cde.vcx*'.
805 }
806 } else {
807 // 'aarch64.*'.
808 if (Neon) {
809 // 'aarch64.neon.*'.
811 .StartsWith("frintn", Intrinsic::roundeven)
812 .StartsWith("rbit", Intrinsic::bitreverse)
815 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
816 F->arg_begin()->getType());
817 return true;
818 }
819
820 if (Name.starts_with("addp")) {
821 // 'aarch64.neon.addp*'.
822 if (F->arg_size() != 2)
823 return false; // Invalid IR.
824 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
825 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
827 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
828 return true;
829 }
830 }
831 return false; // No other 'aarch64.neon.*'.
832 }
833 if (Name.consume_front("sve.")) {
834 // 'aarch64.sve.*'.
835 if (Name.consume_front("bf")) {
836 if (Name.consume_back(".lane")) {
837 // 'aarch64.sve.bf*.lane'.
840 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
841 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
842 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
845 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
846 return true;
847 }
848 return false; // No other 'aarch64.sve.bf*.lane'.
849 }
850 return false; // No other 'aarch64.sve.bf*'.
851 }
852
853 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
854 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
855 NewFn = nullptr;
856 return true;
857 }
858
859 if (Name.consume_front("addqv")) {
860 // 'aarch64.sve.addqv'.
861 if (!F->getReturnType()->isFPOrFPVectorTy())
862 return false;
863
864 auto Args = F->getFunctionType()->params();
865 Type *Tys[] = {F->getReturnType(), Args[1]};
867 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
868 return true;
869 }
870
871 if (Name.consume_front("ld")) {
872 // 'aarch64.sve.ld*'.
873 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
874 if (LdRegex.match(Name)) {
875 Type *ScalarTy =
876 cast<VectorType>(F->getReturnType())->getElementType();
877 ElementCount EC =
878 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
879 Type *Ty = VectorType::get(ScalarTy, EC);
880 static const Intrinsic::ID LoadIDs[] = {
881 Intrinsic::aarch64_sve_ld2_sret,
882 Intrinsic::aarch64_sve_ld3_sret,
883 Intrinsic::aarch64_sve_ld4_sret,
884 };
885 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
886 LoadIDs[Name[0] - '2'], Ty);
887 return true;
888 }
889 return false; // No other 'aarch64.sve.ld*'.
890 }
891
892 if (Name.consume_front("tuple.")) {
893 // 'aarch64.sve.tuple.*'.
894 if (Name.starts_with("get")) {
895 // 'aarch64.sve.tuple.get*'.
896 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
898 F->getParent(), Intrinsic::vector_extract, Tys);
899 return true;
900 }
901
902 if (Name.starts_with("set")) {
903 // 'aarch64.sve.tuple.set*'.
904 auto Args = F->getFunctionType()->params();
905 Type *Tys[] = {Args[0], Args[2], Args[1]};
907 F->getParent(), Intrinsic::vector_insert, Tys);
908 return true;
909 }
910
911 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
912 if (CreateTupleRegex.match(Name)) {
913 // 'aarch64.sve.tuple.create*'.
914 auto Args = F->getFunctionType()->params();
915 Type *Tys[] = {F->getReturnType(), Args[1]};
917 F->getParent(), Intrinsic::vector_insert, Tys);
918 return true;
919 }
920 return false; // No other 'aarch64.sve.tuple.*'.
921 }
922 return false; // No other 'aarch64.sve.*'.
923 }
924 }
925 return false; // No other 'arm.*', 'aarch64.*'.
926}
927
929 if (Name.consume_front("abs."))
931 .Case("bf16", Intrinsic::nvvm_abs_bf16)
932 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
934
935 if (Name.consume_front("fma.rn."))
937 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
938 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
939 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
940 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
941 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
942 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
943 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
944 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
945 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
946 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
947 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
948 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
950
951 if (Name.consume_front("fmax."))
953 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
954 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
955 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
956 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
957 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
958 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
959 .Case("ftz.nan.xorsign.abs.bf16",
960 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
961 .Case("ftz.nan.xorsign.abs.bf16x2",
962 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
963 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
964 .Case("ftz.xorsign.abs.bf16x2",
965 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
966 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
967 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
968 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
969 .Case("nan.xorsign.abs.bf16x2",
970 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
971 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
972 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
974
975 if (Name.consume_front("fmin."))
977 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
978 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
979 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
980 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
981 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
982 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
983 .Case("ftz.nan.xorsign.abs.bf16",
984 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
985 .Case("ftz.nan.xorsign.abs.bf16x2",
986 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
987 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
988 .Case("ftz.xorsign.abs.bf16x2",
989 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
990 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
991 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
992 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
993 .Case("nan.xorsign.abs.bf16x2",
994 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
995 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
996 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
998
999 if (Name.consume_front("neg."))
1001 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1002 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1004
1006}
1007
1009 bool CanUpgradeDebugIntrinsicsToRecords) {
1010 assert(F && "Illegal to upgrade a non-existent Function.");
1011
1012 StringRef Name = F->getName();
1013
1014 // Quickly eliminate it, if it's not a candidate.
1015 if (!Name.consume_front("llvm.") || Name.empty())
1016 return false;
1017
1018 switch (Name[0]) {
1019 default: break;
1020 case 'a': {
1021 bool IsArm = Name.consume_front("arm.");
1022 if (IsArm || Name.consume_front("aarch64.")) {
1023 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1024 return true;
1025 break;
1026 }
1027
1028 if (Name.consume_front("amdgcn.")) {
1029 if (Name == "alignbit") {
1030 // Target specific intrinsic became redundant
1032 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1033 return true;
1034 }
1035
1036 if (Name.consume_front("atomic.")) {
1037 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1038 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1039 // there's no new declaration.
1040 NewFn = nullptr;
1041 return true;
1042 }
1043 break; // No other 'amdgcn.atomic.*'
1044 }
1045
1046 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1047 Name.consume_front("flat.atomic.")) {
1048 if (Name.starts_with("fadd") ||
1049 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1050 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1051 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1052 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1053 // declaration.
1054 NewFn = nullptr;
1055 return true;
1056 }
1057 }
1058
1059 if (Name.starts_with("ldexp.")) {
1060 // Target specific intrinsic became redundant
1062 F->getParent(), Intrinsic::ldexp,
1063 {F->getReturnType(), F->getArg(1)->getType()});
1064 return true;
1065 }
1066 break; // No other 'amdgcn.*'
1067 }
1068
1069 break;
1070 }
1071 case 'c': {
1072 if (F->arg_size() == 1) {
1074 .StartsWith("ctlz.", Intrinsic::ctlz)
1075 .StartsWith("cttz.", Intrinsic::cttz)
1078 rename(F);
1079 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1080 F->arg_begin()->getType());
1081 return true;
1082 }
1083 }
1084
1085 if (F->arg_size() == 2 && Name == "coro.end") {
1086 rename(F);
1087 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1088 Intrinsic::coro_end);
1089 return true;
1090 }
1091
1092 break;
1093 }
1094 case 'd':
1095 if (Name.consume_front("dbg.")) {
1096 // Mark debug intrinsics for upgrade to new debug format.
1097 if (CanUpgradeDebugIntrinsicsToRecords &&
1098 F->getParent()->IsNewDbgInfoFormat) {
1099 if (Name == "addr" || Name == "value" || Name == "assign" ||
1100 Name == "declare" || Name == "label") {
1101 // There's no function to replace these with.
1102 NewFn = nullptr;
1103 // But we do want these to get upgraded.
1104 return true;
1105 }
1106 }
1107 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1108 // converted to DbgVariableRecords later.
1109 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1110 rename(F);
1111 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1112 Intrinsic::dbg_value);
1113 return true;
1114 }
1115 break; // No other 'dbg.*'.
1116 }
1117 break;
1118 case 'e':
1119 if (Name.consume_front("experimental.vector.")) {
1122 // Skip over extract.last.active, otherwise it will be 'upgraded'
1123 // to a regular vector extract which is a different operation.
1124 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1125 .StartsWith("extract.", Intrinsic::vector_extract)
1126 .StartsWith("insert.", Intrinsic::vector_insert)
1127 .StartsWith("splice.", Intrinsic::vector_splice)
1128 .StartsWith("reverse.", Intrinsic::vector_reverse)
1129 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1130 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1133 const auto *FT = F->getFunctionType();
1135 if (ID == Intrinsic::vector_extract ||
1136 ID == Intrinsic::vector_interleave2)
1137 // Extracting overloads the return type.
1138 Tys.push_back(FT->getReturnType());
1139 if (ID != Intrinsic::vector_interleave2)
1140 Tys.push_back(FT->getParamType(0));
1141 if (ID == Intrinsic::vector_insert)
1142 // Inserting overloads the inserted type.
1143 Tys.push_back(FT->getParamType(1));
1144 rename(F);
1145 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1146 return true;
1147 }
1148
1149 if (Name.consume_front("reduce.")) {
1151 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1152 if (R.match(Name, &Groups))
1154 .Case("add", Intrinsic::vector_reduce_add)
1155 .Case("mul", Intrinsic::vector_reduce_mul)
1156 .Case("and", Intrinsic::vector_reduce_and)
1157 .Case("or", Intrinsic::vector_reduce_or)
1158 .Case("xor", Intrinsic::vector_reduce_xor)
1159 .Case("smax", Intrinsic::vector_reduce_smax)
1160 .Case("smin", Intrinsic::vector_reduce_smin)
1161 .Case("umax", Intrinsic::vector_reduce_umax)
1162 .Case("umin", Intrinsic::vector_reduce_umin)
1163 .Case("fmax", Intrinsic::vector_reduce_fmax)
1164 .Case("fmin", Intrinsic::vector_reduce_fmin)
1166
1167 bool V2 = false;
1169 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1170 Groups.clear();
1171 V2 = true;
1172 if (R2.match(Name, &Groups))
1174 .Case("fadd", Intrinsic::vector_reduce_fadd)
1175 .Case("fmul", Intrinsic::vector_reduce_fmul)
1177 }
1179 rename(F);
1180 auto Args = F->getFunctionType()->params();
1181 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1182 {Args[V2 ? 1 : 0]});
1183 return true;
1184 }
1185 break; // No other 'expermental.vector.reduce.*'.
1186 }
1187 break; // No other 'experimental.vector.*'.
1188 }
1189 if (Name.consume_front("experimental.stepvector.")) {
1190 Intrinsic::ID ID = Intrinsic::stepvector;
1191 rename(F);
1193 F->getParent(), ID, F->getFunctionType()->getReturnType());
1194 return true;
1195 }
1196 break; // No other 'e*'.
1197 case 'f':
1198 if (Name.starts_with("flt.rounds")) {
1199 rename(F);
1200 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1201 Intrinsic::get_rounding);
1202 return true;
1203 }
1204 break;
1205 case 'i':
1206 if (Name.starts_with("invariant.group.barrier")) {
1207 // Rename invariant.group.barrier to launder.invariant.group
1208 auto Args = F->getFunctionType()->params();
1209 Type* ObjectPtr[1] = {Args[0]};
1210 rename(F);
1212 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1213 return true;
1214 }
1215 break;
1216 case 'm': {
1217 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1218 // alignment parameter to embedding the alignment as an attribute of
1219 // the pointer args.
1220 if (unsigned ID = StringSwitch<unsigned>(Name)
1221 .StartsWith("memcpy.", Intrinsic::memcpy)
1222 .StartsWith("memmove.", Intrinsic::memmove)
1223 .Default(0)) {
1224 if (F->arg_size() == 5) {
1225 rename(F);
1226 // Get the types of dest, src, and len
1227 ArrayRef<Type *> ParamTypes =
1228 F->getFunctionType()->params().slice(0, 3);
1229 NewFn =
1230 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1231 return true;
1232 }
1233 }
1234 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1235 rename(F);
1236 // Get the types of dest, and len
1237 const auto *FT = F->getFunctionType();
1238 Type *ParamTypes[2] = {
1239 FT->getParamType(0), // Dest
1240 FT->getParamType(2) // len
1241 };
1242 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1243 Intrinsic::memset, ParamTypes);
1244 return true;
1245 }
1246 break;
1247 }
1248 case 'n': {
1249 if (Name.consume_front("nvvm.")) {
1250 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1251 if (F->arg_size() == 1) {
1252 Intrinsic::ID IID =
1254 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1255 .Case("clz.i", Intrinsic::ctlz)
1256 .Case("popc.i", Intrinsic::ctpop)
1258 if (IID != Intrinsic::not_intrinsic) {
1259 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1260 {F->getReturnType()});
1261 return true;
1262 }
1263 }
1264
1265 // Check for nvvm intrinsics that need a return type adjustment.
1266 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1268 if (IID != Intrinsic::not_intrinsic) {
1269 NewFn = nullptr;
1270 return true;
1271 }
1272 }
1273
1274 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1275 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1276 //
1277 // TODO: We could add lohi.i2d.
1278 bool Expand = false;
1279 if (Name.consume_front("abs."))
1280 // nvvm.abs.{i,ii}
1281 Expand = Name == "i" || Name == "ll";
1282 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1283 Expand = true;
1284 else if (Name.consume_front("max.") || Name.consume_front("min."))
1285 // nvvm.{min,max}.{i,ii,ui,ull}
1286 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1287 Name == "ui" || Name == "ull";
1288 else if (Name.consume_front("atomic.load.add."))
1289 // nvvm.atomic.load.add.{f32.p,f64.p}
1290 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1291 else if (Name.consume_front("bitcast."))
1292 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1293 Expand =
1294 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1295 else if (Name.consume_front("rotate."))
1296 // nvvm.rotate.{b32,b64,right.b64}
1297 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1298 else if (Name.consume_front("ptr.gen.to."))
1299 // nvvm.ptr.gen.to.{local,shared,global,constant}
1300 Expand = Name.starts_with("local") || Name.starts_with("shared") ||
1301 Name.starts_with("global") || Name.starts_with("constant");
1302 else if (Name.consume_front("ptr."))
1303 // nvvm.ptr.{local,shared,global,constant}.to.gen
1304 Expand =
1305 (Name.consume_front("local") || Name.consume_front("shared") ||
1306 Name.consume_front("global") || Name.consume_front("constant")) &&
1307 Name.starts_with(".to.gen");
1308 else if (Name.consume_front("ldg.global."))
1309 // nvvm.ldg.global.{i,p,f}
1310 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1311 Name.starts_with("p."));
1312 else
1313 Expand = false;
1314
1315 if (Expand) {
1316 NewFn = nullptr;
1317 return true;
1318 }
1319 break; // No other 'nvvm.*'.
1320 }
1321 break;
1322 }
1323 case 'o':
1324 // We only need to change the name to match the mangling including the
1325 // address space.
1326 if (Name.starts_with("objectsize.")) {
1327 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1328 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1329 F->getName() !=
1330 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1331 rename(F);
1332 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1333 Intrinsic::objectsize, Tys);
1334 return true;
1335 }
1336 }
1337 break;
1338
1339 case 'p':
1340 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1341 rename(F);
1343 F->getParent(), Intrinsic::ptr_annotation,
1344 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1345 return true;
1346 }
1347 break;
1348
1349 case 'r': {
1350 if (Name.consume_front("riscv.")) {
1353 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1354 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1355 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1356 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1359 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1360 rename(F);
1361 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1362 return true;
1363 }
1364 break; // No other applicable upgrades.
1365 }
1366
1368 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1369 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1372 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1373 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1374 rename(F);
1375 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1376 return true;
1377 }
1378 break; // No other applicable upgrades.
1379 }
1380
1382 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1383 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1384 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1385 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1386 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1387 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1390 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1391 rename(F);
1392 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1393 return true;
1394 }
1395 break; // No other applicable upgrades.
1396 }
1397 break; // No other 'riscv.*' intrinsics
1398 }
1399 } break;
1400
1401 case 's':
1402 if (Name == "stackprotectorcheck") {
1403 NewFn = nullptr;
1404 return true;
1405 }
1406 break;
1407
1408 case 'v': {
1409 if (Name == "var.annotation" && F->arg_size() == 4) {
1410 rename(F);
1412 F->getParent(), Intrinsic::var_annotation,
1413 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1414 return true;
1415 }
1416 break;
1417 }
1418
1419 case 'w':
1420 if (Name.consume_front("wasm.")) {
1423 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1424 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1425 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1428 rename(F);
1429 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1430 F->getReturnType());
1431 return true;
1432 }
1433
1434 if (Name.consume_front("dot.i8x16.i7x16.")) {
1436 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1437 .Case("add.signed",
1438 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1441 rename(F);
1442 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1443 return true;
1444 }
1445 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1446 }
1447 break; // No other 'wasm.*'.
1448 }
1449 break;
1450
1451 case 'x':
1452 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1453 return true;
1454 }
1455
1456 auto *ST = dyn_cast<StructType>(F->getReturnType());
1457 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1458 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1459 // Replace return type with literal non-packed struct. Only do this for
1460 // intrinsics declared to return a struct, not for intrinsics with
1461 // overloaded return type, in which case the exact struct type will be
1462 // mangled into the name.
1465 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1466 auto *FT = F->getFunctionType();
1467 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1468 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1469 std::string Name = F->getName().str();
1470 rename(F);
1471 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1472 Name, F->getParent());
1473
1474 // The new function may also need remangling.
1475 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1476 NewFn = *Result;
1477 return true;
1478 }
1479 }
1480
1481 // Remangle our intrinsic since we upgrade the mangling
1483 if (Result != std::nullopt) {
1484 NewFn = *Result;
1485 return true;
1486 }
1487
1488 // This may not belong here. This function is effectively being overloaded
1489 // to both detect an intrinsic which needs upgrading, and to provide the
1490 // upgraded form of the intrinsic. We should perhaps have two separate
1491 // functions for this.
1492 return false;
1493}
1494
1496 bool CanUpgradeDebugIntrinsicsToRecords) {
1497 NewFn = nullptr;
1498 bool Upgraded =
1499 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1500 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1501
1502 // Upgrade intrinsic attributes. This does not change the function.
1503 if (NewFn)
1504 F = NewFn;
1505 if (Intrinsic::ID id = F->getIntrinsicID())
1506 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1507 return Upgraded;
1508}
1509
1511 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1512 GV->getName() == "llvm.global_dtors")) ||
1513 !GV->hasInitializer())
1514 return nullptr;
1515 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1516 if (!ATy)
1517 return nullptr;
1518 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1519 if (!STy || STy->getNumElements() != 2)
1520 return nullptr;
1521
1522 LLVMContext &C = GV->getContext();
1523 IRBuilder<> IRB(C);
1524 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1525 IRB.getPtrTy());
1526 Constant *Init = GV->getInitializer();
1527 unsigned N = Init->getNumOperands();
1528 std::vector<Constant *> NewCtors(N);
1529 for (unsigned i = 0; i != N; ++i) {
1530 auto Ctor = cast<Constant>(Init->getOperand(i));
1531 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1532 Ctor->getAggregateElement(1),
1534 }
1535 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1536
1537 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1538 NewInit, GV->getName());
1539}
1540
1541// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1542// to byte shuffles.
1544 unsigned Shift) {
1545 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1546 unsigned NumElts = ResultTy->getNumElements() * 8;
1547
1548 // Bitcast from a 64-bit element type to a byte element type.
1549 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1550 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1551
1552 // We'll be shuffling in zeroes.
1553 Value *Res = Constant::getNullValue(VecTy);
1554
1555 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1556 // we'll just return the zero vector.
1557 if (Shift < 16) {
1558 int Idxs[64];
1559 // 256/512-bit version is split into 2/4 16-byte lanes.
1560 for (unsigned l = 0; l != NumElts; l += 16)
1561 for (unsigned i = 0; i != 16; ++i) {
1562 unsigned Idx = NumElts + i - Shift;
1563 if (Idx < NumElts)
1564 Idx -= NumElts - 16; // end of lane, switch operand.
1565 Idxs[l + i] = Idx + l;
1566 }
1567
1568 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1569 }
1570
1571 // Bitcast back to a 64-bit element type.
1572 return Builder.CreateBitCast(Res, ResultTy, "cast");
1573}
1574
1575// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1576// to byte shuffles.
1578 unsigned Shift) {
1579 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1580 unsigned NumElts = ResultTy->getNumElements() * 8;
1581
1582 // Bitcast from a 64-bit element type to a byte element type.
1583 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1584 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1585
1586 // We'll be shuffling in zeroes.
1587 Value *Res = Constant::getNullValue(VecTy);
1588
1589 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1590 // we'll just return the zero vector.
1591 if (Shift < 16) {
1592 int Idxs[64];
1593 // 256/512-bit version is split into 2/4 16-byte lanes.
1594 for (unsigned l = 0; l != NumElts; l += 16)
1595 for (unsigned i = 0; i != 16; ++i) {
1596 unsigned Idx = i + Shift;
1597 if (Idx >= 16)
1598 Idx += NumElts - 16; // end of lane, switch operand.
1599 Idxs[l + i] = Idx + l;
1600 }
1601
1602 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1603 }
1604
1605 // Bitcast back to a 64-bit element type.
1606 return Builder.CreateBitCast(Res, ResultTy, "cast");
1607}
1608
1609static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1610 unsigned NumElts) {
1611 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1613 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1614 Mask = Builder.CreateBitCast(Mask, MaskTy);
1615
1616 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1617 // i8 and we need to extract down to the right number of elements.
1618 if (NumElts <= 4) {
1619 int Indices[4];
1620 for (unsigned i = 0; i != NumElts; ++i)
1621 Indices[i] = i;
1622 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1623 "extract");
1624 }
1625
1626 return Mask;
1627}
1628
1629static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1630 Value *Op1) {
1631 // If the mask is all ones just emit the first operation.
1632 if (const auto *C = dyn_cast<Constant>(Mask))
1633 if (C->isAllOnesValue())
1634 return Op0;
1635
1636 Mask = getX86MaskVec(Builder, Mask,
1637 cast<FixedVectorType>(Op0->getType())->getNumElements());
1638 return Builder.CreateSelect(Mask, Op0, Op1);
1639}
1640
1641static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1642 Value *Op1) {
1643 // If the mask is all ones just emit the first operation.
1644 if (const auto *C = dyn_cast<Constant>(Mask))
1645 if (C->isAllOnesValue())
1646 return Op0;
1647
1648 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1649 Mask->getType()->getIntegerBitWidth());
1650 Mask = Builder.CreateBitCast(Mask, MaskTy);
1651 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1652 return Builder.CreateSelect(Mask, Op0, Op1);
1653}
1654
1655// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1656// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1657// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1659 Value *Op1, Value *Shift,
1660 Value *Passthru, Value *Mask,
1661 bool IsVALIGN) {
1662 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1663
1664 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1665 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1666 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1667 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1668
1669 // Mask the immediate for VALIGN.
1670 if (IsVALIGN)
1671 ShiftVal &= (NumElts - 1);
1672
1673 // If palignr is shifting the pair of vectors more than the size of two
1674 // lanes, emit zero.
1675 if (ShiftVal >= 32)
1677
1678 // If palignr is shifting the pair of input vectors more than one lane,
1679 // but less than two lanes, convert to shifting in zeroes.
1680 if (ShiftVal > 16) {
1681 ShiftVal -= 16;
1682 Op1 = Op0;
1684 }
1685
1686 int Indices[64];
1687 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1688 for (unsigned l = 0; l < NumElts; l += 16) {
1689 for (unsigned i = 0; i != 16; ++i) {
1690 unsigned Idx = ShiftVal + i;
1691 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1692 Idx += NumElts - 16; // End of lane, switch operand.
1693 Indices[l + i] = Idx + l;
1694 }
1695 }
1696
1697 Value *Align = Builder.CreateShuffleVector(
1698 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1699
1700 return emitX86Select(Builder, Mask, Align, Passthru);
1701}
1702
1704 bool ZeroMask, bool IndexForm) {
1705 Type *Ty = CI.getType();
1706 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1707 unsigned EltWidth = Ty->getScalarSizeInBits();
1708 bool IsFloat = Ty->isFPOrFPVectorTy();
1709 Intrinsic::ID IID;
1710 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1711 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1712 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1713 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1714 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1715 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1716 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1717 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1718 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1719 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1720 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1721 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1722 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1723 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1724 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1725 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1726 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1727 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1728 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1729 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1730 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1731 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1732 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1733 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1734 else if (VecWidth == 128 && EltWidth == 16)
1735 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1736 else if (VecWidth == 256 && EltWidth == 16)
1737 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1738 else if (VecWidth == 512 && EltWidth == 16)
1739 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1740 else if (VecWidth == 128 && EltWidth == 8)
1741 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1742 else if (VecWidth == 256 && EltWidth == 8)
1743 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1744 else if (VecWidth == 512 && EltWidth == 8)
1745 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1746 else
1747 llvm_unreachable("Unexpected intrinsic");
1748
1749 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1750 CI.getArgOperand(2) };
1751
1752 // If this isn't index form we need to swap operand 0 and 1.
1753 if (!IndexForm)
1754 std::swap(Args[0], Args[1]);
1755
1756 Value *V = Builder.CreateIntrinsic(IID, {}, Args);
1757 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1758 : Builder.CreateBitCast(CI.getArgOperand(1),
1759 Ty);
1760 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1761}
1762
1764 Intrinsic::ID IID) {
1765 Type *Ty = CI.getType();
1766 Value *Op0 = CI.getOperand(0);
1767 Value *Op1 = CI.getOperand(1);
1768 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
1769
1770 if (CI.arg_size() == 4) { // For masked intrinsics.
1771 Value *VecSrc = CI.getOperand(2);
1772 Value *Mask = CI.getOperand(3);
1773 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1774 }
1775 return Res;
1776}
1777
1779 bool IsRotateRight) {
1780 Type *Ty = CI.getType();
1781 Value *Src = CI.getArgOperand(0);
1782 Value *Amt = CI.getArgOperand(1);
1783
1784 // Amount may be scalar immediate, in which case create a splat vector.
1785 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1786 // we only care about the lowest log2 bits anyway.
1787 if (Amt->getType() != Ty) {
1788 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1789 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1790 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1791 }
1792
1793 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1794 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
1795
1796 if (CI.arg_size() == 4) { // For masked intrinsics.
1797 Value *VecSrc = CI.getOperand(2);
1798 Value *Mask = CI.getOperand(3);
1799 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1800 }
1801 return Res;
1802}
1803
1804static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1805 bool IsSigned) {
1806 Type *Ty = CI.getType();
1807 Value *LHS = CI.getArgOperand(0);
1808 Value *RHS = CI.getArgOperand(1);
1809
1810 CmpInst::Predicate Pred;
1811 switch (Imm) {
1812 case 0x0:
1813 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1814 break;
1815 case 0x1:
1816 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1817 break;
1818 case 0x2:
1819 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1820 break;
1821 case 0x3:
1822 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1823 break;
1824 case 0x4:
1825 Pred = ICmpInst::ICMP_EQ;
1826 break;
1827 case 0x5:
1828 Pred = ICmpInst::ICMP_NE;
1829 break;
1830 case 0x6:
1831 return Constant::getNullValue(Ty); // FALSE
1832 case 0x7:
1833 return Constant::getAllOnesValue(Ty); // TRUE
1834 default:
1835 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1836 }
1837
1838 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1839 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1840 return Ext;
1841}
1842
1844 bool IsShiftRight, bool ZeroMask) {
1845 Type *Ty = CI.getType();
1846 Value *Op0 = CI.getArgOperand(0);
1847 Value *Op1 = CI.getArgOperand(1);
1848 Value *Amt = CI.getArgOperand(2);
1849
1850 if (IsShiftRight)
1851 std::swap(Op0, Op1);
1852
1853 // Amount may be scalar immediate, in which case create a splat vector.
1854 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1855 // we only care about the lowest log2 bits anyway.
1856 if (Amt->getType() != Ty) {
1857 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1858 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1859 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1860 }
1861
1862 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1863 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
1864
1865 unsigned NumArgs = CI.arg_size();
1866 if (NumArgs >= 4) { // For masked intrinsics.
1867 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1868 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1869 CI.getArgOperand(0);
1870 Value *Mask = CI.getOperand(NumArgs - 1);
1871 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1872 }
1873 return Res;
1874}
1875
1877 Value *Mask, bool Aligned) {
1878 const Align Alignment =
1879 Aligned
1880 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1881 : Align(1);
1882
1883 // If the mask is all ones just emit a regular store.
1884 if (const auto *C = dyn_cast<Constant>(Mask))
1885 if (C->isAllOnesValue())
1886 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1887
1888 // Convert the mask from an integer type to a vector of i1.
1889 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1890 Mask = getX86MaskVec(Builder, Mask, NumElts);
1891 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1892}
1893
1895 Value *Passthru, Value *Mask, bool Aligned) {
1896 Type *ValTy = Passthru->getType();
1897 const Align Alignment =
1898 Aligned
1899 ? Align(
1901 8)
1902 : Align(1);
1903
1904 // If the mask is all ones just emit a regular store.
1905 if (const auto *C = dyn_cast<Constant>(Mask))
1906 if (C->isAllOnesValue())
1907 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1908
1909 // Convert the mask from an integer type to a vector of i1.
1910 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1911 Mask = getX86MaskVec(Builder, Mask, NumElts);
1912 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1913}
1914
1915static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1916 Type *Ty = CI.getType();
1917 Value *Op0 = CI.getArgOperand(0);
1918 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
1919 {Op0, Builder.getInt1(false)});
1920 if (CI.arg_size() == 3)
1921 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1922 return Res;
1923}
1924
1925static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1926 Type *Ty = CI.getType();
1927
1928 // Arguments have a vXi32 type so cast to vXi64.
1929 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1930 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1931
1932 if (IsSigned) {
1933 // Shift left then arithmetic shift right.
1934 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1935 LHS = Builder.CreateShl(LHS, ShiftAmt);
1936 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1937 RHS = Builder.CreateShl(RHS, ShiftAmt);
1938 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1939 } else {
1940 // Clear the upper bits.
1941 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1942 LHS = Builder.CreateAnd(LHS, Mask);
1943 RHS = Builder.CreateAnd(RHS, Mask);
1944 }
1945
1946 Value *Res = Builder.CreateMul(LHS, RHS);
1947
1948 if (CI.arg_size() == 4)
1949 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1950
1951 return Res;
1952}
1953
1954// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1956 Value *Mask) {
1957 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1958 if (Mask) {
1959 const auto *C = dyn_cast<Constant>(Mask);
1960 if (!C || !C->isAllOnesValue())
1961 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1962 }
1963
1964 if (NumElts < 8) {
1965 int Indices[8];
1966 for (unsigned i = 0; i != NumElts; ++i)
1967 Indices[i] = i;
1968 for (unsigned i = NumElts; i != 8; ++i)
1969 Indices[i] = NumElts + i % NumElts;
1970 Vec = Builder.CreateShuffleVector(Vec,
1972 Indices);
1973 }
1974 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1975}
1976
1978 unsigned CC, bool Signed) {
1979 Value *Op0 = CI.getArgOperand(0);
1980 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1981
1982 Value *Cmp;
1983 if (CC == 3) {
1985 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1986 } else if (CC == 7) {
1988 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1989 } else {
1991 switch (CC) {
1992 default: llvm_unreachable("Unknown condition code");
1993 case 0: Pred = ICmpInst::ICMP_EQ; break;
1994 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1995 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1996 case 4: Pred = ICmpInst::ICMP_NE; break;
1997 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1998 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1999 }
2000 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2001 }
2002
2003 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2004
2005 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2006}
2007
2008// Replace a masked intrinsic with an older unmasked intrinsic.
2010 Intrinsic::ID IID) {
2011 Value *Rep = Builder.CreateIntrinsic(
2012 IID, {}, {CI.getArgOperand(0), CI.getArgOperand(1)});
2013 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2014}
2015
2017 Value* A = CI.getArgOperand(0);
2018 Value* B = CI.getArgOperand(1);
2019 Value* Src = CI.getArgOperand(2);
2020 Value* Mask = CI.getArgOperand(3);
2021
2022 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2023 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2024 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2025 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2026 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2027 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2028}
2029
2031 Value* Op = CI.getArgOperand(0);
2032 Type* ReturnOp = CI.getType();
2033 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2034 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2035 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2036}
2037
2038// Replace intrinsic with unmasked version and a select.
2040 CallBase &CI, Value *&Rep) {
2041 Name = Name.substr(12); // Remove avx512.mask.
2042
2043 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2044 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2045 Intrinsic::ID IID;
2046 if (Name.starts_with("max.p")) {
2047 if (VecWidth == 128 && EltWidth == 32)
2048 IID = Intrinsic::x86_sse_max_ps;
2049 else if (VecWidth == 128 && EltWidth == 64)
2050 IID = Intrinsic::x86_sse2_max_pd;
2051 else if (VecWidth == 256 && EltWidth == 32)
2052 IID = Intrinsic::x86_avx_max_ps_256;
2053 else if (VecWidth == 256 && EltWidth == 64)
2054 IID = Intrinsic::x86_avx_max_pd_256;
2055 else
2056 llvm_unreachable("Unexpected intrinsic");
2057 } else if (Name.starts_with("min.p")) {
2058 if (VecWidth == 128 && EltWidth == 32)
2059 IID = Intrinsic::x86_sse_min_ps;
2060 else if (VecWidth == 128 && EltWidth == 64)
2061 IID = Intrinsic::x86_sse2_min_pd;
2062 else if (VecWidth == 256 && EltWidth == 32)
2063 IID = Intrinsic::x86_avx_min_ps_256;
2064 else if (VecWidth == 256 && EltWidth == 64)
2065 IID = Intrinsic::x86_avx_min_pd_256;
2066 else
2067 llvm_unreachable("Unexpected intrinsic");
2068 } else if (Name.starts_with("pshuf.b.")) {
2069 if (VecWidth == 128)
2070 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2071 else if (VecWidth == 256)
2072 IID = Intrinsic::x86_avx2_pshuf_b;
2073 else if (VecWidth == 512)
2074 IID = Intrinsic::x86_avx512_pshuf_b_512;
2075 else
2076 llvm_unreachable("Unexpected intrinsic");
2077 } else if (Name.starts_with("pmul.hr.sw.")) {
2078 if (VecWidth == 128)
2079 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2080 else if (VecWidth == 256)
2081 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2082 else if (VecWidth == 512)
2083 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2084 else
2085 llvm_unreachable("Unexpected intrinsic");
2086 } else if (Name.starts_with("pmulh.w.")) {
2087 if (VecWidth == 128)
2088 IID = Intrinsic::x86_sse2_pmulh_w;
2089 else if (VecWidth == 256)
2090 IID = Intrinsic::x86_avx2_pmulh_w;
2091 else if (VecWidth == 512)
2092 IID = Intrinsic::x86_avx512_pmulh_w_512;
2093 else
2094 llvm_unreachable("Unexpected intrinsic");
2095 } else if (Name.starts_with("pmulhu.w.")) {
2096 if (VecWidth == 128)
2097 IID = Intrinsic::x86_sse2_pmulhu_w;
2098 else if (VecWidth == 256)
2099 IID = Intrinsic::x86_avx2_pmulhu_w;
2100 else if (VecWidth == 512)
2101 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2102 else
2103 llvm_unreachable("Unexpected intrinsic");
2104 } else if (Name.starts_with("pmaddw.d.")) {
2105 if (VecWidth == 128)
2106 IID = Intrinsic::x86_sse2_pmadd_wd;
2107 else if (VecWidth == 256)
2108 IID = Intrinsic::x86_avx2_pmadd_wd;
2109 else if (VecWidth == 512)
2110 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2111 else
2112 llvm_unreachable("Unexpected intrinsic");
2113 } else if (Name.starts_with("pmaddubs.w.")) {
2114 if (VecWidth == 128)
2115 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2116 else if (VecWidth == 256)
2117 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2118 else if (VecWidth == 512)
2119 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2120 else
2121 llvm_unreachable("Unexpected intrinsic");
2122 } else if (Name.starts_with("packsswb.")) {
2123 if (VecWidth == 128)
2124 IID = Intrinsic::x86_sse2_packsswb_128;
2125 else if (VecWidth == 256)
2126 IID = Intrinsic::x86_avx2_packsswb;
2127 else if (VecWidth == 512)
2128 IID = Intrinsic::x86_avx512_packsswb_512;
2129 else
2130 llvm_unreachable("Unexpected intrinsic");
2131 } else if (Name.starts_with("packssdw.")) {
2132 if (VecWidth == 128)
2133 IID = Intrinsic::x86_sse2_packssdw_128;
2134 else if (VecWidth == 256)
2135 IID = Intrinsic::x86_avx2_packssdw;
2136 else if (VecWidth == 512)
2137 IID = Intrinsic::x86_avx512_packssdw_512;
2138 else
2139 llvm_unreachable("Unexpected intrinsic");
2140 } else if (Name.starts_with("packuswb.")) {
2141 if (VecWidth == 128)
2142 IID = Intrinsic::x86_sse2_packuswb_128;
2143 else if (VecWidth == 256)
2144 IID = Intrinsic::x86_avx2_packuswb;
2145 else if (VecWidth == 512)
2146 IID = Intrinsic::x86_avx512_packuswb_512;
2147 else
2148 llvm_unreachable("Unexpected intrinsic");
2149 } else if (Name.starts_with("packusdw.")) {
2150 if (VecWidth == 128)
2151 IID = Intrinsic::x86_sse41_packusdw;
2152 else if (VecWidth == 256)
2153 IID = Intrinsic::x86_avx2_packusdw;
2154 else if (VecWidth == 512)
2155 IID = Intrinsic::x86_avx512_packusdw_512;
2156 else
2157 llvm_unreachable("Unexpected intrinsic");
2158 } else if (Name.starts_with("vpermilvar.")) {
2159 if (VecWidth == 128 && EltWidth == 32)
2160 IID = Intrinsic::x86_avx_vpermilvar_ps;
2161 else if (VecWidth == 128 && EltWidth == 64)
2162 IID = Intrinsic::x86_avx_vpermilvar_pd;
2163 else if (VecWidth == 256 && EltWidth == 32)
2164 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2165 else if (VecWidth == 256 && EltWidth == 64)
2166 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2167 else if (VecWidth == 512 && EltWidth == 32)
2168 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2169 else if (VecWidth == 512 && EltWidth == 64)
2170 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2171 else
2172 llvm_unreachable("Unexpected intrinsic");
2173 } else if (Name == "cvtpd2dq.256") {
2174 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2175 } else if (Name == "cvtpd2ps.256") {
2176 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2177 } else if (Name == "cvttpd2dq.256") {
2178 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2179 } else if (Name == "cvttps2dq.128") {
2180 IID = Intrinsic::x86_sse2_cvttps2dq;
2181 } else if (Name == "cvttps2dq.256") {
2182 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2183 } else if (Name.starts_with("permvar.")) {
2184 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2185 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2186 IID = Intrinsic::x86_avx2_permps;
2187 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2188 IID = Intrinsic::x86_avx2_permd;
2189 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2190 IID = Intrinsic::x86_avx512_permvar_df_256;
2191 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2192 IID = Intrinsic::x86_avx512_permvar_di_256;
2193 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2194 IID = Intrinsic::x86_avx512_permvar_sf_512;
2195 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2196 IID = Intrinsic::x86_avx512_permvar_si_512;
2197 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2198 IID = Intrinsic::x86_avx512_permvar_df_512;
2199 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2200 IID = Intrinsic::x86_avx512_permvar_di_512;
2201 else if (VecWidth == 128 && EltWidth == 16)
2202 IID = Intrinsic::x86_avx512_permvar_hi_128;
2203 else if (VecWidth == 256 && EltWidth == 16)
2204 IID = Intrinsic::x86_avx512_permvar_hi_256;
2205 else if (VecWidth == 512 && EltWidth == 16)
2206 IID = Intrinsic::x86_avx512_permvar_hi_512;
2207 else if (VecWidth == 128 && EltWidth == 8)
2208 IID = Intrinsic::x86_avx512_permvar_qi_128;
2209 else if (VecWidth == 256 && EltWidth == 8)
2210 IID = Intrinsic::x86_avx512_permvar_qi_256;
2211 else if (VecWidth == 512 && EltWidth == 8)
2212 IID = Intrinsic::x86_avx512_permvar_qi_512;
2213 else
2214 llvm_unreachable("Unexpected intrinsic");
2215 } else if (Name.starts_with("dbpsadbw.")) {
2216 if (VecWidth == 128)
2217 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2218 else if (VecWidth == 256)
2219 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2220 else if (VecWidth == 512)
2221 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2222 else
2223 llvm_unreachable("Unexpected intrinsic");
2224 } else if (Name.starts_with("pmultishift.qb.")) {
2225 if (VecWidth == 128)
2226 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2227 else if (VecWidth == 256)
2228 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2229 else if (VecWidth == 512)
2230 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2231 else
2232 llvm_unreachable("Unexpected intrinsic");
2233 } else if (Name.starts_with("conflict.")) {
2234 if (Name[9] == 'd' && VecWidth == 128)
2235 IID = Intrinsic::x86_avx512_conflict_d_128;
2236 else if (Name[9] == 'd' && VecWidth == 256)
2237 IID = Intrinsic::x86_avx512_conflict_d_256;
2238 else if (Name[9] == 'd' && VecWidth == 512)
2239 IID = Intrinsic::x86_avx512_conflict_d_512;
2240 else if (Name[9] == 'q' && VecWidth == 128)
2241 IID = Intrinsic::x86_avx512_conflict_q_128;
2242 else if (Name[9] == 'q' && VecWidth == 256)
2243 IID = Intrinsic::x86_avx512_conflict_q_256;
2244 else if (Name[9] == 'q' && VecWidth == 512)
2245 IID = Intrinsic::x86_avx512_conflict_q_512;
2246 else
2247 llvm_unreachable("Unexpected intrinsic");
2248 } else if (Name.starts_with("pavg.")) {
2249 if (Name[5] == 'b' && VecWidth == 128)
2250 IID = Intrinsic::x86_sse2_pavg_b;
2251 else if (Name[5] == 'b' && VecWidth == 256)
2252 IID = Intrinsic::x86_avx2_pavg_b;
2253 else if (Name[5] == 'b' && VecWidth == 512)
2254 IID = Intrinsic::x86_avx512_pavg_b_512;
2255 else if (Name[5] == 'w' && VecWidth == 128)
2256 IID = Intrinsic::x86_sse2_pavg_w;
2257 else if (Name[5] == 'w' && VecWidth == 256)
2258 IID = Intrinsic::x86_avx2_pavg_w;
2259 else if (Name[5] == 'w' && VecWidth == 512)
2260 IID = Intrinsic::x86_avx512_pavg_w_512;
2261 else
2262 llvm_unreachable("Unexpected intrinsic");
2263 } else
2264 return false;
2265
2266 SmallVector<Value *, 4> Args(CI.args());
2267 Args.pop_back();
2268 Args.pop_back();
2269 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2270 unsigned NumArgs = CI.arg_size();
2271 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2272 CI.getArgOperand(NumArgs - 2));
2273 return true;
2274}
2275
2276/// Upgrade comment in call to inline asm that represents an objc retain release
2277/// marker.
2278void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2279 size_t Pos;
2280 if (AsmStr->find("mov\tfp") == 0 &&
2281 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2282 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2283 AsmStr->replace(Pos, 1, ";");
2284 }
2285}
2286
2288 Function *F, IRBuilder<> &Builder) {
2289 Value *Rep = nullptr;
2290
2291 if (Name == "abs.i" || Name == "abs.ll") {
2292 Value *Arg = CI->getArgOperand(0);
2293 Value *Neg = Builder.CreateNeg(Arg, "neg");
2294 Value *Cmp = Builder.CreateICmpSGE(
2295 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2296 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2297 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2298 Name.starts_with("atomic.load.add.f64.p")) {
2299 Value *Ptr = CI->getArgOperand(0);
2300 Value *Val = CI->getArgOperand(1);
2301 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2302 AtomicOrdering::SequentiallyConsistent);
2303 } else if (Name.consume_front("max.") &&
2304 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2305 Name == "ui" || Name == "ull")) {
2306 Value *Arg0 = CI->getArgOperand(0);
2307 Value *Arg1 = CI->getArgOperand(1);
2308 Value *Cmp = Name.starts_with("u")
2309 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
2310 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
2311 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
2312 } else if (Name.consume_front("min.") &&
2313 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
2314 Name == "ui" || Name == "ull")) {
2315 Value *Arg0 = CI->getArgOperand(0);
2316 Value *Arg1 = CI->getArgOperand(1);
2317 Value *Cmp = Name.starts_with("u")
2318 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
2319 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
2320 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
2321 } else if (Name == "clz.ll") {
2322 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2323 Value *Arg = CI->getArgOperand(0);
2324 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2325 {Arg, Builder.getFalse()},
2326 /*FMFSource=*/nullptr, "ctlz");
2327 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2328 } else if (Name == "popc.ll") {
2329 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2330 // i64.
2331 Value *Arg = CI->getArgOperand(0);
2332 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2333 Arg, /*FMFSource=*/nullptr, "ctpop");
2334 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2335 } else if (Name == "h2f") {
2336 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2337 {Builder.getFloatTy()}, CI->getArgOperand(0),
2338 /*FMFSource=*/nullptr, "h2f");
2339 } else if (Name.consume_front("bitcast.") &&
2340 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2341 Name == "d2ll")) {
2342 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2343 } else if (Name == "rotate.b32") {
2344 Value *Arg = CI->getOperand(0);
2345 Value *ShiftAmt = CI->getOperand(1);
2346 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2347 {Arg, Arg, ShiftAmt});
2348 } else if (Name == "rotate.b64") {
2349 Type *Int64Ty = Builder.getInt64Ty();
2350 Value *Arg = CI->getOperand(0);
2351 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2352 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2353 {Arg, Arg, ZExtShiftAmt});
2354 } else if (Name == "rotate.right.b64") {
2355 Type *Int64Ty = Builder.getInt64Ty();
2356 Value *Arg = CI->getOperand(0);
2357 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2358 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2359 {Arg, Arg, ZExtShiftAmt});
2360 } else if ((Name.consume_front("ptr.gen.to.") &&
2361 (Name.starts_with("local") || Name.starts_with("shared") ||
2362 Name.starts_with("global") || Name.starts_with("constant"))) ||
2363 (Name.consume_front("ptr.") &&
2364 (Name.consume_front("local") || Name.consume_front("shared") ||
2365 Name.consume_front("global") ||
2366 Name.consume_front("constant")) &&
2367 Name.starts_with(".to.gen"))) {
2368 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2369 } else if (Name.consume_front("ldg.global")) {
2370 Value *Ptr = CI->getArgOperand(0);
2371 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2372 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2373 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2374 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2375 MDNode *MD = MDNode::get(Builder.getContext(), {});
2376 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2377 return LD;
2378 } else {
2380 if (IID != Intrinsic::not_intrinsic &&
2381 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2382 rename(F);
2383 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2385 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2386 Value *Arg = CI->getArgOperand(I);
2387 Type *OldType = Arg->getType();
2388 Type *NewType = NewFn->getArg(I)->getType();
2389 Args.push_back(
2390 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2391 ? Builder.CreateBitCast(Arg, NewType)
2392 : Arg);
2393 }
2394 Rep = Builder.CreateCall(NewFn, Args);
2395 if (F->getReturnType()->isIntegerTy())
2396 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2397 }
2398 }
2399
2400 return Rep;
2401}
2402
2404 IRBuilder<> &Builder) {
2405 LLVMContext &C = F->getContext();
2406 Value *Rep = nullptr;
2407
2408 if (Name.starts_with("sse4a.movnt.")) {
2410 Elts.push_back(
2411 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2412 MDNode *Node = MDNode::get(C, Elts);
2413
2414 Value *Arg0 = CI->getArgOperand(0);
2415 Value *Arg1 = CI->getArgOperand(1);
2416
2417 // Nontemporal (unaligned) store of the 0'th element of the float/double
2418 // vector.
2419 Value *Extract =
2420 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2421
2422 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2423 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2424 } else if (Name.starts_with("avx.movnt.") ||
2425 Name.starts_with("avx512.storent.")) {
2427 Elts.push_back(
2428 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2429 MDNode *Node = MDNode::get(C, Elts);
2430
2431 Value *Arg0 = CI->getArgOperand(0);
2432 Value *Arg1 = CI->getArgOperand(1);
2433
2434 StoreInst *SI = Builder.CreateAlignedStore(
2435 Arg1, Arg0,
2437 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2438 } else if (Name == "sse2.storel.dq") {
2439 Value *Arg0 = CI->getArgOperand(0);
2440 Value *Arg1 = CI->getArgOperand(1);
2441
2442 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2443 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2444 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2445 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2446 } else if (Name.starts_with("sse.storeu.") ||
2447 Name.starts_with("sse2.storeu.") ||
2448 Name.starts_with("avx.storeu.")) {
2449 Value *Arg0 = CI->getArgOperand(0);
2450 Value *Arg1 = CI->getArgOperand(1);
2451 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2452 } else if (Name == "avx512.mask.store.ss") {
2453 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2454 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2455 Mask, false);
2456 } else if (Name.starts_with("avx512.mask.store")) {
2457 // "avx512.mask.storeu." or "avx512.mask.store."
2458 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2459 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2460 CI->getArgOperand(2), Aligned);
2461 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2462 // Upgrade packed integer vector compare intrinsics to compare instructions.
2463 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2464 bool CmpEq = Name[9] == 'e';
2465 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2466 CI->getArgOperand(0), CI->getArgOperand(1));
2467 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2468 } else if (Name.starts_with("avx512.broadcastm")) {
2469 Type *ExtTy = Type::getInt32Ty(C);
2470 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2471 ExtTy = Type::getInt64Ty(C);
2472 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2473 ExtTy->getPrimitiveSizeInBits();
2474 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2475 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2476 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2477 Value *Vec = CI->getArgOperand(0);
2478 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2479 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2480 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2481 } else if (Name.starts_with("avx.sqrt.p") ||
2482 Name.starts_with("sse2.sqrt.p") ||
2483 Name.starts_with("sse.sqrt.p")) {
2484 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2485 {CI->getArgOperand(0)});
2486 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2487 if (CI->arg_size() == 4 &&
2488 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2489 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2490 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2491 : Intrinsic::x86_avx512_sqrt_pd_512;
2492
2493 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2494 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2495 } else {
2496 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2497 {CI->getArgOperand(0)});
2498 }
2499 Rep =
2500 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2501 } else if (Name.starts_with("avx512.ptestm") ||
2502 Name.starts_with("avx512.ptestnm")) {
2503 Value *Op0 = CI->getArgOperand(0);
2504 Value *Op1 = CI->getArgOperand(1);
2505 Value *Mask = CI->getArgOperand(2);
2506 Rep = Builder.CreateAnd(Op0, Op1);
2507 llvm::Type *Ty = Op0->getType();
2509 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2510 ? ICmpInst::ICMP_NE
2511 : ICmpInst::ICMP_EQ;
2512 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2513 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2514 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2515 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2516 ->getNumElements();
2517 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2518 Rep =
2519 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2520 } else if (Name.starts_with("avx512.kunpck")) {
2521 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2522 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2523 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2524 int Indices[64];
2525 for (unsigned i = 0; i != NumElts; ++i)
2526 Indices[i] = i;
2527
2528 // First extract half of each vector. This gives better codegen than
2529 // doing it in a single shuffle.
2530 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2531 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2532 // Concat the vectors.
2533 // NOTE: Operands have to be swapped to match intrinsic definition.
2534 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2535 Rep = Builder.CreateBitCast(Rep, CI->getType());
2536 } else if (Name == "avx512.kand.w") {
2537 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2538 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2539 Rep = Builder.CreateAnd(LHS, RHS);
2540 Rep = Builder.CreateBitCast(Rep, CI->getType());
2541 } else if (Name == "avx512.kandn.w") {
2542 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2543 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2544 LHS = Builder.CreateNot(LHS);
2545 Rep = Builder.CreateAnd(LHS, RHS);
2546 Rep = Builder.CreateBitCast(Rep, CI->getType());
2547 } else if (Name == "avx512.kor.w") {
2548 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2549 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2550 Rep = Builder.CreateOr(LHS, RHS);
2551 Rep = Builder.CreateBitCast(Rep, CI->getType());
2552 } else if (Name == "avx512.kxor.w") {
2553 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2554 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2555 Rep = Builder.CreateXor(LHS, RHS);
2556 Rep = Builder.CreateBitCast(Rep, CI->getType());
2557 } else if (Name == "avx512.kxnor.w") {
2558 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2559 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2560 LHS = Builder.CreateNot(LHS);
2561 Rep = Builder.CreateXor(LHS, RHS);
2562 Rep = Builder.CreateBitCast(Rep, CI->getType());
2563 } else if (Name == "avx512.knot.w") {
2564 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2565 Rep = Builder.CreateNot(Rep);
2566 Rep = Builder.CreateBitCast(Rep, CI->getType());
2567 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2568 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2569 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2570 Rep = Builder.CreateOr(LHS, RHS);
2571 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2572 Value *C;
2573 if (Name[14] == 'c')
2574 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2575 else
2576 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2577 Rep = Builder.CreateICmpEQ(Rep, C);
2578 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2579 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2580 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2581 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2582 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2583 Type *I32Ty = Type::getInt32Ty(C);
2584 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2585 ConstantInt::get(I32Ty, 0));
2586 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2587 ConstantInt::get(I32Ty, 0));
2588 Value *EltOp;
2589 if (Name.contains(".add."))
2590 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2591 else if (Name.contains(".sub."))
2592 EltOp = Builder.CreateFSub(Elt0, Elt1);
2593 else if (Name.contains(".mul."))
2594 EltOp = Builder.CreateFMul(Elt0, Elt1);
2595 else
2596 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2597 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2598 ConstantInt::get(I32Ty, 0));
2599 } else if (Name.starts_with("avx512.mask.pcmp")) {
2600 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2601 bool CmpEq = Name[16] == 'e';
2602 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2603 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2604 Type *OpTy = CI->getArgOperand(0)->getType();
2605 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2606 Intrinsic::ID IID;
2607 switch (VecWidth) {
2608 default:
2609 llvm_unreachable("Unexpected intrinsic");
2610 case 128:
2611 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2612 break;
2613 case 256:
2614 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2615 break;
2616 case 512:
2617 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2618 break;
2619 }
2620
2621 Rep = Builder.CreateIntrinsic(IID, {},
2622 {CI->getOperand(0), CI->getArgOperand(1)});
2623 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2624 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2625 Type *OpTy = CI->getArgOperand(0)->getType();
2626 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2627 unsigned EltWidth = OpTy->getScalarSizeInBits();
2628 Intrinsic::ID IID;
2629 if (VecWidth == 128 && EltWidth == 32)
2630 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2631 else if (VecWidth == 256 && EltWidth == 32)
2632 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2633 else if (VecWidth == 512 && EltWidth == 32)
2634 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2635 else if (VecWidth == 128 && EltWidth == 64)
2636 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2637 else if (VecWidth == 256 && EltWidth == 64)
2638 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2639 else if (VecWidth == 512 && EltWidth == 64)
2640 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2641 else
2642 llvm_unreachable("Unexpected intrinsic");
2643
2644 Rep = Builder.CreateIntrinsic(IID, {},
2645 {CI->getOperand(0), CI->getArgOperand(1)});
2646 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2647 } else if (Name.starts_with("avx512.cmp.p")) {
2648 SmallVector<Value *, 4> Args(CI->args());
2649 Type *OpTy = Args[0]->getType();
2650 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2651 unsigned EltWidth = OpTy->getScalarSizeInBits();
2652 Intrinsic::ID IID;
2653 if (VecWidth == 128 && EltWidth == 32)
2654 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2655 else if (VecWidth == 256 && EltWidth == 32)
2656 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2657 else if (VecWidth == 512 && EltWidth == 32)
2658 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2659 else if (VecWidth == 128 && EltWidth == 64)
2660 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2661 else if (VecWidth == 256 && EltWidth == 64)
2662 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2663 else if (VecWidth == 512 && EltWidth == 64)
2664 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2665 else
2666 llvm_unreachable("Unexpected intrinsic");
2667
2669 if (VecWidth == 512)
2670 std::swap(Mask, Args.back());
2671 Args.push_back(Mask);
2672
2673 Rep = Builder.CreateIntrinsic(IID, {}, Args);
2674 } else if (Name.starts_with("avx512.mask.cmp.")) {
2675 // Integer compare intrinsics.
2676 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2677 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2678 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2679 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2680 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2681 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2682 Name.starts_with("avx512.cvtw2mask.") ||
2683 Name.starts_with("avx512.cvtd2mask.") ||
2684 Name.starts_with("avx512.cvtq2mask.")) {
2685 Value *Op = CI->getArgOperand(0);
2686 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2687 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2688 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2689 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2690 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2691 Name.starts_with("avx512.mask.pabs")) {
2692 Rep = upgradeAbs(Builder, *CI);
2693 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2694 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2695 Name.starts_with("avx512.mask.pmaxs")) {
2696 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2697 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2698 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2699 Name.starts_with("avx512.mask.pmaxu")) {
2700 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2701 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2702 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2703 Name.starts_with("avx512.mask.pmins")) {
2704 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2705 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2706 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2707 Name.starts_with("avx512.mask.pminu")) {
2708 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2709 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2710 Name == "avx512.pmulu.dq.512" ||
2711 Name.starts_with("avx512.mask.pmulu.dq.")) {
2712 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2713 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2714 Name == "avx512.pmul.dq.512" ||
2715 Name.starts_with("avx512.mask.pmul.dq.")) {
2716 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2717 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2718 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2719 Rep =
2720 Builder.CreateSIToFP(CI->getArgOperand(1),
2721 cast<VectorType>(CI->getType())->getElementType());
2722 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2723 } else if (Name == "avx512.cvtusi2sd") {
2724 Rep =
2725 Builder.CreateUIToFP(CI->getArgOperand(1),
2726 cast<VectorType>(CI->getType())->getElementType());
2727 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2728 } else if (Name == "sse2.cvtss2sd") {
2729 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2730 Rep = Builder.CreateFPExt(
2731 Rep, cast<VectorType>(CI->getType())->getElementType());
2732 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2733 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2734 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2735 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2736 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2737 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2738 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2739 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2740 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2741 Name == "avx512.mask.cvtqq2ps.256" ||
2742 Name == "avx512.mask.cvtqq2ps.512" ||
2743 Name == "avx512.mask.cvtuqq2ps.256" ||
2744 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2745 Name == "avx.cvt.ps2.pd.256" ||
2746 Name == "avx512.mask.cvtps2pd.128" ||
2747 Name == "avx512.mask.cvtps2pd.256") {
2748 auto *DstTy = cast<FixedVectorType>(CI->getType());
2749 Rep = CI->getArgOperand(0);
2750 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2751
2752 unsigned NumDstElts = DstTy->getNumElements();
2753 if (NumDstElts < SrcTy->getNumElements()) {
2754 assert(NumDstElts == 2 && "Unexpected vector size");
2755 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2756 }
2757
2758 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2759 bool IsUnsigned = Name.contains("cvtu");
2760 if (IsPS2PD)
2761 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2762 else if (CI->arg_size() == 4 &&
2763 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2764 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2765 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2766 : Intrinsic::x86_avx512_sitofp_round;
2767 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
2768 {Rep, CI->getArgOperand(3)});
2769 } else {
2770 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2771 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2772 }
2773
2774 if (CI->arg_size() >= 3)
2775 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2776 CI->getArgOperand(1));
2777 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2778 Name.starts_with("vcvtph2ps.")) {
2779 auto *DstTy = cast<FixedVectorType>(CI->getType());
2780 Rep = CI->getArgOperand(0);
2781 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2782 unsigned NumDstElts = DstTy->getNumElements();
2783 if (NumDstElts != SrcTy->getNumElements()) {
2784 assert(NumDstElts == 4 && "Unexpected vector size");
2785 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2786 }
2787 Rep = Builder.CreateBitCast(
2788 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2789 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2790 if (CI->arg_size() >= 3)
2791 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2792 CI->getArgOperand(1));
2793 } else if (Name.starts_with("avx512.mask.load")) {
2794 // "avx512.mask.loadu." or "avx512.mask.load."
2795 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2796 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2797 CI->getArgOperand(2), Aligned);
2798 } else if (Name.starts_with("avx512.mask.expand.load.")) {
2799 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2800 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2801 ResultTy->getNumElements());
2802
2803 Rep = Builder.CreateIntrinsic(
2804 Intrinsic::masked_expandload, ResultTy,
2805 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
2806 } else if (Name.starts_with("avx512.mask.compress.store.")) {
2807 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2808 Value *MaskVec =
2809 getX86MaskVec(Builder, CI->getArgOperand(2),
2810 cast<FixedVectorType>(ResultTy)->getNumElements());
2811
2812 Rep = Builder.CreateIntrinsic(
2813 Intrinsic::masked_compressstore, ResultTy,
2814 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
2815 } else if (Name.starts_with("avx512.mask.compress.") ||
2816 Name.starts_with("avx512.mask.expand.")) {
2817 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2818
2819 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2820 ResultTy->getNumElements());
2821
2822 bool IsCompress = Name[12] == 'c';
2823 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2824 : Intrinsic::x86_avx512_mask_expand;
2825 Rep = Builder.CreateIntrinsic(
2826 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
2827 } else if (Name.starts_with("xop.vpcom")) {
2828 bool IsSigned;
2829 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2830 Name.ends_with("uq"))
2831 IsSigned = false;
2832 else if (Name.ends_with("b") || Name.ends_with("w") ||
2833 Name.ends_with("d") || Name.ends_with("q"))
2834 IsSigned = true;
2835 else
2836 llvm_unreachable("Unknown suffix");
2837
2838 unsigned Imm;
2839 if (CI->arg_size() == 3) {
2840 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2841 } else {
2842 Name = Name.substr(9); // strip off "xop.vpcom"
2843 if (Name.starts_with("lt"))
2844 Imm = 0;
2845 else if (Name.starts_with("le"))
2846 Imm = 1;
2847 else if (Name.starts_with("gt"))
2848 Imm = 2;
2849 else if (Name.starts_with("ge"))
2850 Imm = 3;
2851 else if (Name.starts_with("eq"))
2852 Imm = 4;
2853 else if (Name.starts_with("ne"))
2854 Imm = 5;
2855 else if (Name.starts_with("false"))
2856 Imm = 6;
2857 else if (Name.starts_with("true"))
2858 Imm = 7;
2859 else
2860 llvm_unreachable("Unknown condition");
2861 }
2862
2863 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2864 } else if (Name.starts_with("xop.vpcmov")) {
2865 Value *Sel = CI->getArgOperand(2);
2866 Value *NotSel = Builder.CreateNot(Sel);
2867 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2868 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2869 Rep = Builder.CreateOr(Sel0, Sel1);
2870 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2871 Name.starts_with("avx512.mask.prol")) {
2872 Rep = upgradeX86Rotate(Builder, *CI, false);
2873 } else if (Name.starts_with("avx512.pror") ||
2874 Name.starts_with("avx512.mask.pror")) {
2875 Rep = upgradeX86Rotate(Builder, *CI, true);
2876 } else if (Name.starts_with("avx512.vpshld.") ||
2877 Name.starts_with("avx512.mask.vpshld") ||
2878 Name.starts_with("avx512.maskz.vpshld")) {
2879 bool ZeroMask = Name[11] == 'z';
2880 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2881 } else if (Name.starts_with("avx512.vpshrd.") ||
2882 Name.starts_with("avx512.mask.vpshrd") ||
2883 Name.starts_with("avx512.maskz.vpshrd")) {
2884 bool ZeroMask = Name[11] == 'z';
2885 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2886 } else if (Name == "sse42.crc32.64.8") {
2887 Value *Trunc0 =
2888 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2889 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8, {},
2890 {Trunc0, CI->getArgOperand(1)});
2891 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2892 } else if (Name.starts_with("avx.vbroadcast.s") ||
2893 Name.starts_with("avx512.vbroadcast.s")) {
2894 // Replace broadcasts with a series of insertelements.
2895 auto *VecTy = cast<FixedVectorType>(CI->getType());
2896 Type *EltTy = VecTy->getElementType();
2897 unsigned EltNum = VecTy->getNumElements();
2898 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2899 Type *I32Ty = Type::getInt32Ty(C);
2900 Rep = PoisonValue::get(VecTy);
2901 for (unsigned I = 0; I < EltNum; ++I)
2902 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2903 } else if (Name.starts_with("sse41.pmovsx") ||
2904 Name.starts_with("sse41.pmovzx") ||
2905 Name.starts_with("avx2.pmovsx") ||
2906 Name.starts_with("avx2.pmovzx") ||
2907 Name.starts_with("avx512.mask.pmovsx") ||
2908 Name.starts_with("avx512.mask.pmovzx")) {
2909 auto *DstTy = cast<FixedVectorType>(CI->getType());
2910 unsigned NumDstElts = DstTy->getNumElements();
2911
2912 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2913 SmallVector<int, 8> ShuffleMask(NumDstElts);
2914 for (unsigned i = 0; i != NumDstElts; ++i)
2915 ShuffleMask[i] = i;
2916
2917 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2918
2919 bool DoSext = Name.contains("pmovsx");
2920 Rep =
2921 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2922 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2923 if (CI->arg_size() == 3)
2924 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2925 CI->getArgOperand(1));
2926 } else if (Name == "avx512.mask.pmov.qd.256" ||
2927 Name == "avx512.mask.pmov.qd.512" ||
2928 Name == "avx512.mask.pmov.wb.256" ||
2929 Name == "avx512.mask.pmov.wb.512") {
2930 Type *Ty = CI->getArgOperand(1)->getType();
2931 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2932 Rep =
2933 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2934 } else if (Name.starts_with("avx.vbroadcastf128") ||
2935 Name == "avx2.vbroadcasti128") {
2936 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2937 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2938 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2939 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2940 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
2941 if (NumSrcElts == 2)
2942 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2943 else
2944 Rep = Builder.CreateShuffleVector(Load,
2945 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2946 } else if (Name.starts_with("avx512.mask.shuf.i") ||
2947 Name.starts_with("avx512.mask.shuf.f")) {
2948 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2949 Type *VT = CI->getType();
2950 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2951 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2952 unsigned ControlBitsMask = NumLanes - 1;
2953 unsigned NumControlBits = NumLanes / 2;
2954 SmallVector<int, 8> ShuffleMask(0);
2955
2956 for (unsigned l = 0; l != NumLanes; ++l) {
2957 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2958 // We actually need the other source.
2959 if (l >= NumLanes / 2)
2960 LaneMask += NumLanes;
2961 for (unsigned i = 0; i != NumElementsInLane; ++i)
2962 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2963 }
2964 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2965 CI->getArgOperand(1), ShuffleMask);
2966 Rep =
2967 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2968 } else if (Name.starts_with("avx512.mask.broadcastf") ||
2969 Name.starts_with("avx512.mask.broadcasti")) {
2970 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2971 ->getNumElements();
2972 unsigned NumDstElts =
2973 cast<FixedVectorType>(CI->getType())->getNumElements();
2974
2975 SmallVector<int, 8> ShuffleMask(NumDstElts);
2976 for (unsigned i = 0; i != NumDstElts; ++i)
2977 ShuffleMask[i] = i % NumSrcElts;
2978
2979 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2980 CI->getArgOperand(0), ShuffleMask);
2981 Rep =
2982 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2983 } else if (Name.starts_with("avx2.pbroadcast") ||
2984 Name.starts_with("avx2.vbroadcast") ||
2985 Name.starts_with("avx512.pbroadcast") ||
2986 Name.starts_with("avx512.mask.broadcast.s")) {
2987 // Replace vp?broadcasts with a vector shuffle.
2988 Value *Op = CI->getArgOperand(0);
2989 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2990 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2993 Rep = Builder.CreateShuffleVector(Op, M);
2994
2995 if (CI->arg_size() == 3)
2996 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2997 CI->getArgOperand(1));
2998 } else if (Name.starts_with("sse2.padds.") ||
2999 Name.starts_with("avx2.padds.") ||
3000 Name.starts_with("avx512.padds.") ||
3001 Name.starts_with("avx512.mask.padds.")) {
3002 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3003 } else if (Name.starts_with("sse2.psubs.") ||
3004 Name.starts_with("avx2.psubs.") ||
3005 Name.starts_with("avx512.psubs.") ||
3006 Name.starts_with("avx512.mask.psubs.")) {
3007 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3008 } else if (Name.starts_with("sse2.paddus.") ||
3009 Name.starts_with("avx2.paddus.") ||
3010 Name.starts_with("avx512.mask.paddus.")) {
3011 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3012 } else if (Name.starts_with("sse2.psubus.") ||
3013 Name.starts_with("avx2.psubus.") ||
3014 Name.starts_with("avx512.mask.psubus.")) {
3015 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3016 } else if (Name.starts_with("avx512.mask.palignr.")) {
3017 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3018 CI->getArgOperand(1), CI->getArgOperand(2),
3019 CI->getArgOperand(3), CI->getArgOperand(4),
3020 false);
3021 } else if (Name.starts_with("avx512.mask.valign.")) {
3023 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3024 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3025 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3026 // 128/256-bit shift left specified in bits.
3027 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3028 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3029 Shift / 8); // Shift is in bits.
3030 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3031 // 128/256-bit shift right specified in bits.
3032 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3033 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3034 Shift / 8); // Shift is in bits.
3035 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3036 Name == "avx512.psll.dq.512") {
3037 // 128/256/512-bit shift left specified in bytes.
3038 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3039 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3040 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3041 Name == "avx512.psrl.dq.512") {
3042 // 128/256/512-bit shift right specified in bytes.
3043 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3044 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3045 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3046 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3047 Name.starts_with("avx2.pblendd.")) {
3048 Value *Op0 = CI->getArgOperand(0);
3049 Value *Op1 = CI->getArgOperand(1);
3050 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3051 auto *VecTy = cast<FixedVectorType>(CI->getType());
3052 unsigned NumElts = VecTy->getNumElements();
3053
3054 SmallVector<int, 16> Idxs(NumElts);
3055 for (unsigned i = 0; i != NumElts; ++i)
3056 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3057
3058 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3059 } else if (Name.starts_with("avx.vinsertf128.") ||
3060 Name == "avx2.vinserti128" ||
3061 Name.starts_with("avx512.mask.insert")) {
3062 Value *Op0 = CI->getArgOperand(0);
3063 Value *Op1 = CI->getArgOperand(1);
3064 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3065 unsigned DstNumElts =
3066 cast<FixedVectorType>(CI->getType())->getNumElements();
3067 unsigned SrcNumElts =
3068 cast<FixedVectorType>(Op1->getType())->getNumElements();
3069 unsigned Scale = DstNumElts / SrcNumElts;
3070
3071 // Mask off the high bits of the immediate value; hardware ignores those.
3072 Imm = Imm % Scale;
3073
3074 // Extend the second operand into a vector the size of the destination.
3075 SmallVector<int, 8> Idxs(DstNumElts);
3076 for (unsigned i = 0; i != SrcNumElts; ++i)
3077 Idxs[i] = i;
3078 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3079 Idxs[i] = SrcNumElts;
3080 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3081
3082 // Insert the second operand into the first operand.
3083
3084 // Note that there is no guarantee that instruction lowering will actually
3085 // produce a vinsertf128 instruction for the created shuffles. In
3086 // particular, the 0 immediate case involves no lane changes, so it can
3087 // be handled as a blend.
3088
3089 // Example of shuffle mask for 32-bit elements:
3090 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3091 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3092
3093 // First fill with identify mask.
3094 for (unsigned i = 0; i != DstNumElts; ++i)
3095 Idxs[i] = i;
3096 // Then replace the elements where we need to insert.
3097 for (unsigned i = 0; i != SrcNumElts; ++i)
3098 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3099 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3100
3101 // If the intrinsic has a mask operand, handle that.
3102 if (CI->arg_size() == 5)
3103 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3104 CI->getArgOperand(3));
3105 } else if (Name.starts_with("avx.vextractf128.") ||
3106 Name == "avx2.vextracti128" ||
3107 Name.starts_with("avx512.mask.vextract")) {
3108 Value *Op0 = CI->getArgOperand(0);
3109 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3110 unsigned DstNumElts =
3111 cast<FixedVectorType>(CI->getType())->getNumElements();
3112 unsigned SrcNumElts =
3113 cast<FixedVectorType>(Op0->getType())->getNumElements();
3114 unsigned Scale = SrcNumElts / DstNumElts;
3115
3116 // Mask off the high bits of the immediate value; hardware ignores those.
3117 Imm = Imm % Scale;
3118
3119 // Get indexes for the subvector of the input vector.
3120 SmallVector<int, 8> Idxs(DstNumElts);
3121 for (unsigned i = 0; i != DstNumElts; ++i) {
3122 Idxs[i] = i + (Imm * DstNumElts);
3123 }
3124 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3125
3126 // If the intrinsic has a mask operand, handle that.
3127 if (CI->arg_size() == 4)
3128 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3129 CI->getArgOperand(2));
3130 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3131 Name.starts_with("avx512.mask.perm.di.")) {
3132 Value *Op0 = CI->getArgOperand(0);
3133 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3134 auto *VecTy = cast<FixedVectorType>(CI->getType());
3135 unsigned NumElts = VecTy->getNumElements();
3136
3137 SmallVector<int, 8> Idxs(NumElts);
3138 for (unsigned i = 0; i != NumElts; ++i)
3139 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3140
3141 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3142
3143 if (CI->arg_size() == 4)
3144 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3145 CI->getArgOperand(2));
3146 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3147 // The immediate permute control byte looks like this:
3148 // [1:0] - select 128 bits from sources for low half of destination
3149 // [2] - ignore
3150 // [3] - zero low half of destination
3151 // [5:4] - select 128 bits from sources for high half of destination
3152 // [6] - ignore
3153 // [7] - zero high half of destination
3154
3155 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3156
3157 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3158 unsigned HalfSize = NumElts / 2;
3159 SmallVector<int, 8> ShuffleMask(NumElts);
3160
3161 // Determine which operand(s) are actually in use for this instruction.
3162 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3163 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3164
3165 // If needed, replace operands based on zero mask.
3166 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3167 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3168
3169 // Permute low half of result.
3170 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3171 for (unsigned i = 0; i < HalfSize; ++i)
3172 ShuffleMask[i] = StartIndex + i;
3173
3174 // Permute high half of result.
3175 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3176 for (unsigned i = 0; i < HalfSize; ++i)
3177 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3178
3179 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3180
3181 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3182 Name.starts_with("avx512.mask.vpermil.p") ||
3183 Name.starts_with("avx512.mask.pshuf.d.")) {
3184 Value *Op0 = CI->getArgOperand(0);
3185 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3186 auto *VecTy = cast<FixedVectorType>(CI->getType());
3187 unsigned NumElts = VecTy->getNumElements();
3188 // Calculate the size of each index in the immediate.
3189 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3190 unsigned IdxMask = ((1 << IdxSize) - 1);
3191
3192 SmallVector<int, 8> Idxs(NumElts);
3193 // Lookup the bits for this element, wrapping around the immediate every
3194 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3195 // to offset by the first index of each group.
3196 for (unsigned i = 0; i != NumElts; ++i)
3197 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3198
3199 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3200
3201 if (CI->arg_size() == 4)
3202 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3203 CI->getArgOperand(2));
3204 } else if (Name == "sse2.pshufl.w" ||
3205 Name.starts_with("avx512.mask.pshufl.w.")) {
3206 Value *Op0 = CI->getArgOperand(0);
3207 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3208 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3209
3210 SmallVector<int, 16> Idxs(NumElts);
3211 for (unsigned l = 0; l != NumElts; l += 8) {
3212 for (unsigned i = 0; i != 4; ++i)
3213 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3214 for (unsigned i = 4; i != 8; ++i)
3215 Idxs[i + l] = i + l;
3216 }
3217
3218 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3219
3220 if (CI->arg_size() == 4)
3221 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3222 CI->getArgOperand(2));
3223 } else if (Name == "sse2.pshufh.w" ||
3224 Name.starts_with("avx512.mask.pshufh.w.")) {
3225 Value *Op0 = CI->getArgOperand(0);
3226 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3227 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3228
3229 SmallVector<int, 16> Idxs(NumElts);
3230 for (unsigned l = 0; l != NumElts; l += 8) {
3231 for (unsigned i = 0; i != 4; ++i)
3232 Idxs[i + l] = i + l;
3233 for (unsigned i = 0; i != 4; ++i)
3234 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3235 }
3236
3237 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3238
3239 if (CI->arg_size() == 4)
3240 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3241 CI->getArgOperand(2));
3242 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3243 Value *Op0 = CI->getArgOperand(0);
3244 Value *Op1 = CI->getArgOperand(1);
3245 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3246 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3247
3248 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3249 unsigned HalfLaneElts = NumLaneElts / 2;
3250
3251 SmallVector<int, 16> Idxs(NumElts);
3252 for (unsigned i = 0; i != NumElts; ++i) {
3253 // Base index is the starting element of the lane.
3254 Idxs[i] = i - (i % NumLaneElts);
3255 // If we are half way through the lane switch to the other source.
3256 if ((i % NumLaneElts) >= HalfLaneElts)
3257 Idxs[i] += NumElts;
3258 // Now select the specific element. By adding HalfLaneElts bits from
3259 // the immediate. Wrapping around the immediate every 8-bits.
3260 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3261 }
3262
3263 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3264
3265 Rep =
3266 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3267 } else if (Name.starts_with("avx512.mask.movddup") ||
3268 Name.starts_with("avx512.mask.movshdup") ||
3269 Name.starts_with("avx512.mask.movsldup")) {
3270 Value *Op0 = CI->getArgOperand(0);
3271 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3272 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3273
3274 unsigned Offset = 0;
3275 if (Name.starts_with("avx512.mask.movshdup."))
3276 Offset = 1;
3277
3278 SmallVector<int, 16> Idxs(NumElts);
3279 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3280 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3281 Idxs[i + l + 0] = i + l + Offset;
3282 Idxs[i + l + 1] = i + l + Offset;
3283 }
3284
3285 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3286
3287 Rep =
3288 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3289 } else if (Name.starts_with("avx512.mask.punpckl") ||
3290 Name.starts_with("avx512.mask.unpckl.")) {
3291 Value *Op0 = CI->getArgOperand(0);
3292 Value *Op1 = CI->getArgOperand(1);
3293 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3294 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3295
3296 SmallVector<int, 64> Idxs(NumElts);
3297 for (int l = 0; l != NumElts; l += NumLaneElts)
3298 for (int i = 0; i != NumLaneElts; ++i)
3299 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3300
3301 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3302
3303 Rep =
3304 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3305 } else if (Name.starts_with("avx512.mask.punpckh") ||
3306 Name.starts_with("avx512.mask.unpckh.")) {
3307 Value *Op0 = CI->getArgOperand(0);
3308 Value *Op1 = CI->getArgOperand(1);
3309 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3310 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3311
3312 SmallVector<int, 64> Idxs(NumElts);
3313 for (int l = 0; l != NumElts; l += NumLaneElts)
3314 for (int i = 0; i != NumLaneElts; ++i)
3315 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3316
3317 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3318
3319 Rep =
3320 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3321 } else if (Name.starts_with("avx512.mask.and.") ||
3322 Name.starts_with("avx512.mask.pand.")) {
3323 VectorType *FTy = cast<VectorType>(CI->getType());
3324 VectorType *ITy = VectorType::getInteger(FTy);
3325 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3326 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3327 Rep = Builder.CreateBitCast(Rep, FTy);
3328 Rep =
3329 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3330 } else if (Name.starts_with("avx512.mask.andn.") ||
3331 Name.starts_with("avx512.mask.pandn.")) {
3332 VectorType *FTy = cast<VectorType>(CI->getType());
3333 VectorType *ITy = VectorType::getInteger(FTy);
3334 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3335 Rep = Builder.CreateAnd(Rep,
3336 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3337 Rep = Builder.CreateBitCast(Rep, FTy);
3338 Rep =
3339 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3340 } else if (Name.starts_with("avx512.mask.or.") ||
3341 Name.starts_with("avx512.mask.por.")) {
3342 VectorType *FTy = cast<VectorType>(CI->getType());
3343 VectorType *ITy = VectorType::getInteger(FTy);
3344 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3345 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3346 Rep = Builder.CreateBitCast(Rep, FTy);
3347 Rep =
3348 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3349 } else if (Name.starts_with("avx512.mask.xor.") ||
3350 Name.starts_with("avx512.mask.pxor.")) {
3351 VectorType *FTy = cast<VectorType>(CI->getType());
3352 VectorType *ITy = VectorType::getInteger(FTy);
3353 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3354 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3355 Rep = Builder.CreateBitCast(Rep, FTy);
3356 Rep =
3357 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3358 } else if (Name.starts_with("avx512.mask.padd.")) {
3359 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3360 Rep =
3361 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3362 } else if (Name.starts_with("avx512.mask.psub.")) {
3363 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3364 Rep =
3365 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3366 } else if (Name.starts_with("avx512.mask.pmull.")) {
3367 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3368 Rep =
3369 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3370 } else if (Name.starts_with("avx512.mask.add.p")) {
3371 if (Name.ends_with(".512")) {
3372 Intrinsic::ID IID;
3373 if (Name[17] == 's')
3374 IID = Intrinsic::x86_avx512_add_ps_512;
3375 else
3376 IID = Intrinsic::x86_avx512_add_pd_512;
3377
3378 Rep = Builder.CreateIntrinsic(
3379 IID, {},
3380 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3381 } else {
3382 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3383 }
3384 Rep =
3385 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3386 } else if (Name.starts_with("avx512.mask.div.p")) {
3387 if (Name.ends_with(".512")) {
3388 Intrinsic::ID IID;
3389 if (Name[17] == 's')
3390 IID = Intrinsic::x86_avx512_div_ps_512;
3391 else
3392 IID = Intrinsic::x86_avx512_div_pd_512;
3393
3394 Rep = Builder.CreateIntrinsic(
3395 IID, {},
3396 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3397 } else {
3398 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3399 }
3400 Rep =
3401 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3402 } else if (Name.starts_with("avx512.mask.mul.p")) {
3403 if (Name.ends_with(".512")) {
3404 Intrinsic::ID IID;
3405 if (Name[17] == 's')
3406 IID = Intrinsic::x86_avx512_mul_ps_512;
3407 else
3408 IID = Intrinsic::x86_avx512_mul_pd_512;
3409
3410 Rep = Builder.CreateIntrinsic(
3411 IID, {},
3412 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3413 } else {
3414 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3415 }
3416 Rep =
3417 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3418 } else if (Name.starts_with("avx512.mask.sub.p")) {
3419 if (Name.ends_with(".512")) {
3420 Intrinsic::ID IID;
3421 if (Name[17] == 's')
3422 IID = Intrinsic::x86_avx512_sub_ps_512;
3423 else
3424 IID = Intrinsic::x86_avx512_sub_pd_512;
3425
3426 Rep = Builder.CreateIntrinsic(
3427 IID, {},
3428 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3429 } else {
3430 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3431 }
3432 Rep =
3433 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3434 } else if ((Name.starts_with("avx512.mask.max.p") ||
3435 Name.starts_with("avx512.mask.min.p")) &&
3436 Name.drop_front(18) == ".512") {
3437 bool IsDouble = Name[17] == 'd';
3438 bool IsMin = Name[13] == 'i';
3439 static const Intrinsic::ID MinMaxTbl[2][2] = {
3440 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3441 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3442 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3443
3444 Rep = Builder.CreateIntrinsic(
3445 IID, {},
3446 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3447 Rep =
3448 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3449 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3450 Rep =
3451 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3452 {CI->getArgOperand(0), Builder.getInt1(false)});
3453 Rep =
3454 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3455 } else if (Name.starts_with("avx512.mask.psll")) {
3456 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3457 bool IsVariable = Name[16] == 'v';
3458 char Size = Name[16] == '.' ? Name[17]
3459 : Name[17] == '.' ? Name[18]
3460 : Name[18] == '.' ? Name[19]
3461 : Name[20];
3462
3463 Intrinsic::ID IID;
3464 if (IsVariable && Name[17] != '.') {
3465 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3466 IID = Intrinsic::x86_avx2_psllv_q;
3467 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3468 IID = Intrinsic::x86_avx2_psllv_q_256;
3469 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3470 IID = Intrinsic::x86_avx2_psllv_d;
3471 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3472 IID = Intrinsic::x86_avx2_psllv_d_256;
3473 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3474 IID = Intrinsic::x86_avx512_psllv_w_128;
3475 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3476 IID = Intrinsic::x86_avx512_psllv_w_256;
3477 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3478 IID = Intrinsic::x86_avx512_psllv_w_512;
3479 else
3480 llvm_unreachable("Unexpected size");
3481 } else if (Name.ends_with(".128")) {
3482 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3483 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3484 : Intrinsic::x86_sse2_psll_d;
3485 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3486 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3487 : Intrinsic::x86_sse2_psll_q;
3488 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3489 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3490 : Intrinsic::x86_sse2_psll_w;
3491 else
3492 llvm_unreachable("Unexpected size");
3493 } else if (Name.ends_with(".256")) {
3494 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3495 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3496 : Intrinsic::x86_avx2_psll_d;
3497 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3498 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3499 : Intrinsic::x86_avx2_psll_q;
3500 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3501 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3502 : Intrinsic::x86_avx2_psll_w;
3503 else
3504 llvm_unreachable("Unexpected size");
3505 } else {
3506 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3507 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3508 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3509 : Intrinsic::x86_avx512_psll_d_512;
3510 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3511 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3512 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3513 : Intrinsic::x86_avx512_psll_q_512;
3514 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3515 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3516 : Intrinsic::x86_avx512_psll_w_512;
3517 else
3518 llvm_unreachable("Unexpected size");
3519 }
3520
3521 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3522 } else if (Name.starts_with("avx512.mask.psrl")) {
3523 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3524 bool IsVariable = Name[16] == 'v';
3525 char Size = Name[16] == '.' ? Name[17]
3526 : Name[17] == '.' ? Name[18]
3527 : Name[18] == '.' ? Name[19]
3528 : Name[20];
3529
3530 Intrinsic::ID IID;
3531 if (IsVariable && Name[17] != '.') {
3532 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3533 IID = Intrinsic::x86_avx2_psrlv_q;
3534 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3535 IID = Intrinsic::x86_avx2_psrlv_q_256;
3536 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3537 IID = Intrinsic::x86_avx2_psrlv_d;
3538 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3539 IID = Intrinsic::x86_avx2_psrlv_d_256;
3540 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3541 IID = Intrinsic::x86_avx512_psrlv_w_128;
3542 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3543 IID = Intrinsic::x86_avx512_psrlv_w_256;
3544 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3545 IID = Intrinsic::x86_avx512_psrlv_w_512;
3546 else
3547 llvm_unreachable("Unexpected size");
3548 } else if (Name.ends_with(".128")) {
3549 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3550 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3551 : Intrinsic::x86_sse2_psrl_d;
3552 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3553 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3554 : Intrinsic::x86_sse2_psrl_q;
3555 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3556 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3557 : Intrinsic::x86_sse2_psrl_w;
3558 else
3559 llvm_unreachable("Unexpected size");
3560 } else if (Name.ends_with(".256")) {
3561 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3562 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3563 : Intrinsic::x86_avx2_psrl_d;
3564 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3565 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3566 : Intrinsic::x86_avx2_psrl_q;
3567 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3568 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3569 : Intrinsic::x86_avx2_psrl_w;
3570 else
3571 llvm_unreachable("Unexpected size");
3572 } else {
3573 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3574 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3575 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3576 : Intrinsic::x86_avx512_psrl_d_512;
3577 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3578 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3579 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3580 : Intrinsic::x86_avx512_psrl_q_512;
3581 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3582 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3583 : Intrinsic::x86_avx512_psrl_w_512;
3584 else
3585 llvm_unreachable("Unexpected size");
3586 }
3587
3588 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3589 } else if (Name.starts_with("avx512.mask.psra")) {
3590 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3591 bool IsVariable = Name[16] == 'v';
3592 char Size = Name[16] == '.' ? Name[17]
3593 : Name[17] == '.' ? Name[18]
3594 : Name[18] == '.' ? Name[19]
3595 : Name[20];
3596
3597 Intrinsic::ID IID;
3598 if (IsVariable && Name[17] != '.') {
3599 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3600 IID = Intrinsic::x86_avx2_psrav_d;
3601 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3602 IID = Intrinsic::x86_avx2_psrav_d_256;
3603 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3604 IID = Intrinsic::x86_avx512_psrav_w_128;
3605 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3606 IID = Intrinsic::x86_avx512_psrav_w_256;
3607 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3608 IID = Intrinsic::x86_avx512_psrav_w_512;
3609 else
3610 llvm_unreachable("Unexpected size");
3611 } else if (Name.ends_with(".128")) {
3612 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3613 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3614 : Intrinsic::x86_sse2_psra_d;
3615 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3616 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3617 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3618 : Intrinsic::x86_avx512_psra_q_128;
3619 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3620 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3621 : Intrinsic::x86_sse2_psra_w;
3622 else
3623 llvm_unreachable("Unexpected size");
3624 } else if (Name.ends_with(".256")) {
3625 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3626 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3627 : Intrinsic::x86_avx2_psra_d;
3628 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3629 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3630 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3631 : Intrinsic::x86_avx512_psra_q_256;
3632 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3633 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3634 : Intrinsic::x86_avx2_psra_w;
3635 else
3636 llvm_unreachable("Unexpected size");
3637 } else {
3638 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3639 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3640 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3641 : Intrinsic::x86_avx512_psra_d_512;
3642 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3643 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3644 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3645 : Intrinsic::x86_avx512_psra_q_512;
3646 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3647 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3648 : Intrinsic::x86_avx512_psra_w_512;
3649 else
3650 llvm_unreachable("Unexpected size");
3651 }
3652
3653 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3654 } else if (Name.starts_with("avx512.mask.move.s")) {
3655 Rep = upgradeMaskedMove(Builder, *CI);
3656 } else if (Name.starts_with("avx512.cvtmask2")) {
3657 Rep = upgradeMaskToInt(Builder, *CI);
3658 } else if (Name.ends_with(".movntdqa")) {
3660 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3661
3662 LoadInst *LI = Builder.CreateAlignedLoad(
3663 CI->getType(), CI->getArgOperand(0),
3665 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3666 Rep = LI;
3667 } else if (Name.starts_with("fma.vfmadd.") ||
3668 Name.starts_with("fma.vfmsub.") ||
3669 Name.starts_with("fma.vfnmadd.") ||
3670 Name.starts_with("fma.vfnmsub.")) {
3671 bool NegMul = Name[6] == 'n';
3672 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3673 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3674
3675 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3676 CI->getArgOperand(2)};
3677
3678 if (IsScalar) {
3679 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3680 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3681 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3682 }
3683
3684 if (NegMul && !IsScalar)
3685 Ops[0] = Builder.CreateFNeg(Ops[0]);
3686 if (NegMul && IsScalar)
3687 Ops[1] = Builder.CreateFNeg(Ops[1]);
3688 if (NegAcc)
3689 Ops[2] = Builder.CreateFNeg(Ops[2]);
3690
3691 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3692
3693 if (IsScalar)
3694 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3695 } else if (Name.starts_with("fma4.vfmadd.s")) {
3696 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3697 CI->getArgOperand(2)};
3698
3699 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3700 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3701 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3702
3703 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
3704
3706 Rep, (uint64_t)0);
3707 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3708 Name.starts_with("avx512.maskz.vfmadd.s") ||
3709 Name.starts_with("avx512.mask3.vfmadd.s") ||
3710 Name.starts_with("avx512.mask3.vfmsub.s") ||
3711 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3712 bool IsMask3 = Name[11] == '3';
3713 bool IsMaskZ = Name[11] == 'z';
3714 // Drop the "avx512.mask." to make it easier.
3715 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3716 bool NegMul = Name[2] == 'n';
3717 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3718
3719 Value *A = CI->getArgOperand(0);
3720 Value *B = CI->getArgOperand(1);
3721 Value *C = CI->getArgOperand(2);
3722
3723 if (NegMul && (IsMask3 || IsMaskZ))
3724 A = Builder.CreateFNeg(A);
3725 if (NegMul && !(IsMask3 || IsMaskZ))
3726 B = Builder.CreateFNeg(B);
3727 if (NegAcc)
3728 C = Builder.CreateFNeg(C);
3729
3730 A = Builder.CreateExtractElement(A, (uint64_t)0);
3731 B = Builder.CreateExtractElement(B, (uint64_t)0);
3732 C = Builder.CreateExtractElement(C, (uint64_t)0);
3733
3734 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3735 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3736 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3737
3738 Intrinsic::ID IID;
3739 if (Name.back() == 'd')
3740 IID = Intrinsic::x86_avx512_vfmadd_f64;
3741 else
3742 IID = Intrinsic::x86_avx512_vfmadd_f32;
3743 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3744 } else {
3745 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3746 }
3747
3748 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3749 : IsMask3 ? C
3750 : A;
3751
3752 // For Mask3 with NegAcc, we need to create a new extractelement that
3753 // avoids the negation above.
3754 if (NegAcc && IsMask3)
3755 PassThru =
3756 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3757
3758 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3759 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3760 (uint64_t)0);
3761 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3762 Name.starts_with("avx512.mask.vfnmadd.p") ||
3763 Name.starts_with("avx512.mask.vfnmsub.p") ||
3764 Name.starts_with("avx512.mask3.vfmadd.p") ||
3765 Name.starts_with("avx512.mask3.vfmsub.p") ||
3766 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3767 Name.starts_with("avx512.maskz.vfmadd.p")) {
3768 bool IsMask3 = Name[11] == '3';
3769 bool IsMaskZ = Name[11] == 'z';
3770 // Drop the "avx512.mask." to make it easier.
3771 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3772 bool NegMul = Name[2] == 'n';
3773 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3774
3775 Value *A = CI->getArgOperand(0);
3776 Value *B = CI->getArgOperand(1);
3777 Value *C = CI->getArgOperand(2);
3778
3779 if (NegMul && (IsMask3 || IsMaskZ))
3780 A = Builder.CreateFNeg(A);
3781 if (NegMul && !(IsMask3 || IsMaskZ))
3782 B = Builder.CreateFNeg(B);
3783 if (NegAcc)
3784 C = Builder.CreateFNeg(C);
3785
3786 if (CI->arg_size() == 5 &&
3787 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3788 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3789 Intrinsic::ID IID;
3790 // Check the character before ".512" in string.
3791 if (Name[Name.size() - 5] == 's')
3792 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3793 else
3794 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3795
3796 Rep = Builder.CreateIntrinsic(IID, {}, {A, B, C, CI->getArgOperand(4)});
3797 } else {
3798 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C});
3799 }
3800
3801 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3802 : IsMask3 ? CI->getArgOperand(2)
3803 : CI->getArgOperand(0);
3804
3805 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3806 } else if (Name.starts_with("fma.vfmsubadd.p")) {
3807 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3808 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3809 Intrinsic::ID IID;
3810 if (VecWidth == 128 && EltWidth == 32)
3811 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3812 else if (VecWidth == 256 && EltWidth == 32)
3813 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3814 else if (VecWidth == 128 && EltWidth == 64)
3815 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3816 else if (VecWidth == 256 && EltWidth == 64)
3817 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3818 else
3819 llvm_unreachable("Unexpected intrinsic");
3820
3821 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3822 CI->getArgOperand(2)};
3823 Ops[2] = Builder.CreateFNeg(Ops[2]);
3824 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3825 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3826 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3827 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3828 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3829 bool IsMask3 = Name[11] == '3';
3830 bool IsMaskZ = Name[11] == 'z';
3831 // Drop the "avx512.mask." to make it easier.
3832 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3833 bool IsSubAdd = Name[3] == 's';
3834 if (CI->arg_size() == 5) {
3835 Intrinsic::ID IID;
3836 // Check the character before ".512" in string.
3837 if (Name[Name.size() - 5] == 's')
3838 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3839 else
3840 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3841
3842 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3843 CI->getArgOperand(2), CI->getArgOperand(4)};
3844 if (IsSubAdd)
3845 Ops[2] = Builder.CreateFNeg(Ops[2]);
3846
3847 Rep = Builder.CreateIntrinsic(IID, {}, Ops);
3848 } else {
3849 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3850
3851 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3852 CI->getArgOperand(2)};
3853
3855 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
3856 Value *Odd = Builder.CreateCall(FMA, Ops);
3857 Ops[2] = Builder.CreateFNeg(Ops[2]);
3858 Value *Even = Builder.CreateCall(FMA, Ops);
3859
3860 if (IsSubAdd)
3861 std::swap(Even, Odd);
3862
3863 SmallVector<int, 32> Idxs(NumElts);
3864 for (int i = 0; i != NumElts; ++i)
3865 Idxs[i] = i + (i % 2) * NumElts;
3866
3867 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3868 }
3869
3870 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3871 : IsMask3 ? CI->getArgOperand(2)
3872 : CI->getArgOperand(0);
3873
3874 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3875 } else if (Name.starts_with("avx512.mask.pternlog.") ||
3876 Name.starts_with("avx512.maskz.pternlog.")) {
3877 bool ZeroMask = Name[11] == 'z';
3878 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3879 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3880 Intrinsic::ID IID;
3881 if (VecWidth == 128 && EltWidth == 32)
3882 IID = Intrinsic::x86_avx512_pternlog_d_128;
3883 else if (VecWidth == 256 && EltWidth == 32)
3884 IID = Intrinsic::x86_avx512_pternlog_d_256;
3885 else if (VecWidth == 512 && EltWidth == 32)
3886 IID = Intrinsic::x86_avx512_pternlog_d_512;
3887 else if (VecWidth == 128 && EltWidth == 64)
3888 IID = Intrinsic::x86_avx512_pternlog_q_128;
3889 else if (VecWidth == 256 && EltWidth == 64)
3890 IID = Intrinsic::x86_avx512_pternlog_q_256;
3891 else if (VecWidth == 512 && EltWidth == 64)
3892 IID = Intrinsic::x86_avx512_pternlog_q_512;
3893 else
3894 llvm_unreachable("Unexpected intrinsic");
3895
3896 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3897 CI->getArgOperand(2), CI->getArgOperand(3)};
3898 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3899 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3900 : CI->getArgOperand(0);
3901 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3902 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
3903 Name.starts_with("avx512.maskz.vpmadd52")) {
3904 bool ZeroMask = Name[11] == 'z';
3905 bool High = Name[20] == 'h' || Name[21] == 'h';
3906 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3907 Intrinsic::ID IID;
3908 if (VecWidth == 128 && !High)
3909 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3910 else if (VecWidth == 256 && !High)
3911 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3912 else if (VecWidth == 512 && !High)
3913 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3914 else if (VecWidth == 128 && High)
3915 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3916 else if (VecWidth == 256 && High)
3917 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3918 else if (VecWidth == 512 && High)
3919 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3920 else
3921 llvm_unreachable("Unexpected intrinsic");
3922
3923 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3924 CI->getArgOperand(2)};
3925 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3926 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3927 : CI->getArgOperand(0);
3928 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3929 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3930 Name.starts_with("avx512.mask.vpermt2var.") ||
3931 Name.starts_with("avx512.maskz.vpermt2var.")) {
3932 bool ZeroMask = Name[11] == 'z';
3933 bool IndexForm = Name[17] == 'i';
3934 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3935 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3936 Name.starts_with("avx512.maskz.vpdpbusd.") ||
3937 Name.starts_with("avx512.mask.vpdpbusds.") ||
3938 Name.starts_with("avx512.maskz.vpdpbusds.")) {
3939 bool ZeroMask = Name[11] == 'z';
3940 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3941 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3942 Intrinsic::ID IID;
3943 if (VecWidth == 128 && !IsSaturating)
3944 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3945 else if (VecWidth == 256 && !IsSaturating)
3946 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3947 else if (VecWidth == 512 && !IsSaturating)
3948 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3949 else if (VecWidth == 128 && IsSaturating)
3950 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3951 else if (VecWidth == 256 && IsSaturating)
3952 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3953 else if (VecWidth == 512 && IsSaturating)
3954 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3955 else
3956 llvm_unreachable("Unexpected intrinsic");
3957
3958 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3959 CI->getArgOperand(2)};
3960 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3961 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3962 : CI->getArgOperand(0);
3963 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3964 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3965 Name.starts_with("avx512.maskz.vpdpwssd.") ||
3966 Name.starts_with("avx512.mask.vpdpwssds.") ||
3967 Name.starts_with("avx512.maskz.vpdpwssds.")) {
3968 bool ZeroMask = Name[11] == 'z';
3969 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3970 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3971 Intrinsic::ID IID;
3972 if (VecWidth == 128 && !IsSaturating)
3973 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3974 else if (VecWidth == 256 && !IsSaturating)
3975 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3976 else if (VecWidth == 512 && !IsSaturating)
3977 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3978 else if (VecWidth == 128 && IsSaturating)
3979 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3980 else if (VecWidth == 256 && IsSaturating)
3981 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3982 else if (VecWidth == 512 && IsSaturating)
3983 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3984 else
3985 llvm_unreachable("Unexpected intrinsic");
3986
3987 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3988 CI->getArgOperand(2)};
3989 Rep = Builder.CreateIntrinsic(IID, {}, Args);
3990 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3991 : CI->getArgOperand(0);
3992 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3993 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3994 Name == "addcarry.u32" || Name == "addcarry.u64" ||
3995 Name == "subborrow.u32" || Name == "subborrow.u64") {
3996 Intrinsic::ID IID;
3997 if (Name[0] == 'a' && Name.back() == '2')
3998 IID = Intrinsic::x86_addcarry_32;
3999 else if (Name[0] == 'a' && Name.back() == '4')
4000 IID = Intrinsic::x86_addcarry_64;
4001 else if (Name[0] == 's' && Name.back() == '2')
4002 IID = Intrinsic::x86_subborrow_32;
4003 else if (Name[0] == 's' && Name.back() == '4')
4004 IID = Intrinsic::x86_subborrow_64;
4005 else
4006 llvm_unreachable("Unexpected intrinsic");
4007
4008 // Make a call with 3 operands.
4009 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4010 CI->getArgOperand(2)};
4011 Value *NewCall = Builder.CreateIntrinsic(IID, {}, Args);
4012
4013 // Extract the second result and store it.
4014 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4015 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4016 // Replace the original call result with the first result of the new call.
4017 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4018
4019 CI->replaceAllUsesWith(CF);
4020 Rep = nullptr;
4021 } else if (Name.starts_with("avx512.mask.") &&
4022 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4023 // Rep will be updated by the call in the condition.
4024 }
4025
4026 return Rep;
4027}
4028
4030 Function *F, IRBuilder<> &Builder) {
4031 Intrinsic::ID NewID =
4033 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4034 .Case("sve.fcvtnt.bf16f32", Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4036 if (NewID == Intrinsic::not_intrinsic)
4037 llvm_unreachable("Unhandled Intrinsic!");
4038
4039 SmallVector<Value *, 3> Args(CI->args());
4040
4041 // The original intrinsics incorrectly used a predicate based on the smallest
4042 // element type rather than the largest.
4043 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4044 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4045
4046 if (Args[1]->getType() != BadPredTy)
4047 llvm_unreachable("Unexpected predicate type!");
4048
4049 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4050 BadPredTy, Args[1]);
4051 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
4052 GoodPredTy, Args[1]);
4053
4054 return Builder.CreateIntrinsic(NewID, {}, Args, /*FMFSource=*/nullptr,
4055 CI->getName());
4056}
4057
4059 IRBuilder<> &Builder) {
4060 if (Name == "mve.vctp64.old") {
4061 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4062 // correct type.
4063 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4064 CI->getArgOperand(0),
4065 /*FMFSource=*/nullptr, CI->getName());
4066 Value *C1 = Builder.CreateIntrinsic(
4067 Intrinsic::arm_mve_pred_v2i,
4068 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4069 return Builder.CreateIntrinsic(
4070 Intrinsic::arm_mve_pred_i2v,
4071 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4072 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4073 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4074 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4075 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4076 Name ==
4077 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4078 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4079 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4080 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4081 Name ==
4082 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4083 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4084 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4085 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4086 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4087 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4088 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4089 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4090 std::vector<Type *> Tys;
4091 unsigned ID = CI->getIntrinsicID();
4092 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4093 switch (ID) {
4094 case Intrinsic::arm_mve_mull_int_predicated:
4095 case Intrinsic::arm_mve_vqdmull_predicated:
4096 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4097 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4098 break;
4099 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4100 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4101 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4102 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4103 V2I1Ty};
4104 break;
4105 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4106 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4107 CI->getOperand(1)->getType(), V2I1Ty};
4108 break;
4109 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4110 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4111 CI->getOperand(2)->getType(), V2I1Ty};
4112 break;
4113 case Intrinsic::arm_cde_vcx1q_predicated:
4114 case Intrinsic::arm_cde_vcx1qa_predicated:
4115 case Intrinsic::arm_cde_vcx2q_predicated:
4116 case Intrinsic::arm_cde_vcx2qa_predicated:
4117 case Intrinsic::arm_cde_vcx3q_predicated:
4118 case Intrinsic::arm_cde_vcx3qa_predicated:
4119 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4120 break;
4121 default:
4122 llvm_unreachable("Unhandled Intrinsic!");
4123 }
4124
4125 std::vector<Value *> Ops;
4126 for (Value *Op : CI->args()) {
4127 Type *Ty = Op->getType();
4128 if (Ty->getScalarSizeInBits() == 1) {
4129 Value *C1 = Builder.CreateIntrinsic(
4130 Intrinsic::arm_mve_pred_v2i,
4131 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4132 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4133 }
4134 Ops.push_back(Op);
4135 }
4136
4137 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4138 CI->getName());
4139 }
4140 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4141}
4142
4143// These are expected to have the arguments:
4144// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4145//
4146// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4147//
4149 Function *F, IRBuilder<> &Builder) {
4150 AtomicRMWInst::BinOp RMWOp =
4152 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4153 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4154 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4155 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4156 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4157 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4158 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4159 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4160 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4161 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4162 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax);
4163
4164 unsigned NumOperands = CI->getNumOperands();
4165 if (NumOperands < 3) // Malformed bitcode.
4166 return nullptr;
4167
4168 Value *Ptr = CI->getArgOperand(0);
4169 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4170 if (!PtrTy) // Malformed.
4171 return nullptr;
4172
4173 Value *Val = CI->getArgOperand(1);
4174 if (Val->getType() != CI->getType()) // Malformed.
4175 return nullptr;
4176
4177 ConstantInt *OrderArg = nullptr;
4178 bool IsVolatile = false;
4179
4180 // These should have 5 arguments (plus the callee). A separate version of the
4181 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4182 if (NumOperands > 3)
4183 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4184
4185 // Ignore scope argument at 3
4186
4187 if (NumOperands > 5) {
4188 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4189 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4190 }
4191
4192 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4193 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4194 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4195 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4196 Order = AtomicOrdering::SequentiallyConsistent;
4197
4198 LLVMContext &Ctx = F->getContext();
4199
4200 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4201 Type *RetTy = CI->getType();
4202 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4203 if (VT->getElementType()->isIntegerTy(16)) {
4204 VectorType *AsBF16 =
4205 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4206 Val = Builder.CreateBitCast(Val, AsBF16);
4207 }
4208 }
4209
4210 // The scope argument never really worked correctly. Use agent as the most
4211 // conservative option which should still always produce the instruction.
4212 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4213 AtomicRMWInst *RMW =
4214 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4215
4216 unsigned AddrSpace = PtrTy->getAddressSpace();
4217 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4218 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4219 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4220 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4221 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4222 }
4223
4224 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4225 MDBuilder MDB(F->getContext());
4226 MDNode *RangeNotPrivate =
4229 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4230 }
4231
4232 if (IsVolatile)
4233 RMW->setVolatile(true);
4234
4235 return Builder.CreateBitCast(RMW, RetTy);
4236}
4237
4238/// Helper to unwrap intrinsic call MetadataAsValue operands.
4239template <typename MDType>
4240static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
4241 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4242 return dyn_cast<MDType>(MAV->getMetadata());
4243 return nullptr;
4244}
4245
4246/// Convert debug intrinsic calls to non-instruction debug records.
4247/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4248/// \p CI - The debug intrinsic call.
4250 DbgRecord *DR = nullptr;
4251 if (Name == "label") {
4252 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
4253 } else if (Name == "assign") {
4254 DR = new DbgVariableRecord(
4255 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4256 unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
4257 unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
4258 CI->getDebugLoc());
4259 } else if (Name == "declare") {
4260 DR = new DbgVariableRecord(
4261 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4262 unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
4263 DbgVariableRecord::LocationType::Declare);
4264 } else if (Name == "addr") {
4265 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4266 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
4267 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4268 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
4269 unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
4270 CI->getDebugLoc());
4271 } else if (Name == "value") {
4272 // An old version of dbg.value had an extra offset argument.
4273 unsigned VarOp = 1;
4274 unsigned ExprOp = 2;
4275 if (CI->arg_size() == 4) {
4276 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4277 // Nonzero offset dbg.values get dropped without a replacement.
4278 if (!Offset || !Offset->isZeroValue())
4279 return;
4280 VarOp = 2;
4281 ExprOp = 3;
4282 }
4283 DR = new DbgVariableRecord(
4284 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
4285 unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
4286 }
4287 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4288 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4289}
4290
4291/// Upgrade a call to an old intrinsic. All argument and return casting must be
4292/// provided to seamlessly integrate with existing context.
4294 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4295 // checks the callee's function type matches. It's likely we need to handle
4296 // type changes here.
4297 Function *F = dyn_cast<Function>(CI->getCalledOperand());
4298 if (!F)
4299 return;
4300
4301 LLVMContext &C = CI->getContext();
4302 IRBuilder<> Builder(C);
4303 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4304
4305 if (!NewFn) {
4306 bool FallthroughToDefaultUpgrade = false;
4307 // Get the Function's name.
4308 StringRef Name = F->getName();
4309
4310 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4311 Name = Name.substr(5);
4312
4313 bool IsX86 = Name.consume_front("x86.");
4314 bool IsNVVM = Name.consume_front("nvvm.");
4315 bool IsAArch64 = Name.consume_front("aarch64.");
4316 bool IsARM = Name.consume_front("arm.");
4317 bool IsAMDGCN = Name.consume_front("amdgcn.");
4318 bool IsDbg = Name.consume_front("dbg.");
4319 Value *Rep = nullptr;
4320
4321 if (!IsX86 && Name == "stackprotectorcheck") {
4322 Rep = nullptr;
4323 } else if (IsNVVM) {
4324 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4325 } else if (IsX86) {
4326 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4327 } else if (IsAArch64) {
4328 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4329 } else if (IsARM) {
4330 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4331 } else if (IsAMDGCN) {
4332 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4333 } else if (IsDbg) {
4334 // We might have decided we don't want the new format after all between
4335 // first requesting the upgrade and now; skip the conversion if that is
4336 // the case, and check here to see if the intrinsic needs to be upgraded
4337 // normally.
4338 if (!CI->getModule()->IsNewDbgInfoFormat) {
4339 bool NeedsUpgrade =
4340 upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);
4341 if (!NeedsUpgrade)
4342 return;
4343 FallthroughToDefaultUpgrade = true;
4344 } else {
4346 }
4347 } else {
4348 llvm_unreachable("Unknown function for CallBase upgrade.");
4349 }
4350
4351 if (!FallthroughToDefaultUpgrade) {
4352 if (Rep)
4353 CI->replaceAllUsesWith(Rep);
4354 CI->eraseFromParent();
4355 return;
4356 }
4357 }
4358
4359 const auto &DefaultCase = [&]() -> void {
4360 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4361 // Handle generic mangling change.
4362 assert(
4363 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4364 "Unknown function for CallBase upgrade and isn't just a name change");
4365 CI->setCalledFunction(NewFn);
4366 return;
4367 }
4368
4369 // This must be an upgrade from a named to a literal struct.
4370 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4371 assert(OldST != NewFn->getReturnType() &&
4372 "Return type must have changed");
4373 assert(OldST->getNumElements() ==
4374 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4375 "Must have same number of elements");
4376
4377 SmallVector<Value *> Args(CI->args());
4378 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4379 NewCI->setAttributes(CI->getAttributes());
4380 Value *Res = PoisonValue::get(OldST);
4381 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4382 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4383 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4384 }
4385 CI->replaceAllUsesWith(Res);
4386 CI->eraseFromParent();
4387 return;
4388 }
4389
4390 // We're probably about to produce something invalid. Let the verifier catch
4391 // it instead of dying here.
4392 CI->setCalledOperand(
4394 return;
4395 };
4396 CallInst *NewCall = nullptr;
4397 switch (NewFn->getIntrinsicID()) {
4398 default: {
4399 DefaultCase();
4400 return;
4401 }
4402 case Intrinsic::arm_neon_vst1:
4403 case Intrinsic::arm_neon_vst2:
4404 case Intrinsic::arm_neon_vst3:
4405 case Intrinsic::arm_neon_vst4:
4406 case Intrinsic::arm_neon_vst2lane:
4407 case Intrinsic::arm_neon_vst3lane:
4408 case Intrinsic::arm_neon_vst4lane: {
4409 SmallVector<Value *, 4> Args(CI->args());
4410 NewCall = Builder.CreateCall(NewFn, Args);
4411 break;
4412 }
4413 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4414 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4415 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4416 LLVMContext &Ctx = F->getParent()->getContext();
4417 SmallVector<Value *, 4> Args(CI->args());
4418 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4419 cast<ConstantInt>(Args[3])->getZExtValue());
4420 NewCall = Builder.CreateCall(NewFn, Args);
4421 break;
4422 }
4423 case Intrinsic::aarch64_sve_ld3_sret:
4424 case Intrinsic::aarch64_sve_ld4_sret:
4425 case Intrinsic::aarch64_sve_ld2_sret: {
4426 StringRef Name = F->getName();
4427 Name = Name.substr(5);
4428 unsigned N = StringSwitch<unsigned>(Name)
4429 .StartsWith("aarch64.sve.ld2", 2)
4430 .StartsWith("aarch64.sve.ld3", 3)
4431 .StartsWith("aarch64.sve.ld4", 4)
4432 .Default(0);
4433 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4434 unsigned MinElts = RetTy->getMinNumElements() / N;
4435 SmallVector<Value *, 2> Args(CI->args());
4436 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4438 for (unsigned I = 0; I < N; I++) {
4439 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4440 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4441 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4442 }
4443 NewCall = dyn_cast<CallInst>(Ret);
4444 break;
4445 }
4446
4447 case Intrinsic::coro_end: {
4448 SmallVector<Value *, 3> Args(CI->args());
4449 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4450 NewCall = Builder.CreateCall(NewFn, Args);
4451 break;
4452 }
4453
4454 case Intrinsic::vector_extract: {
4455 StringRef Name = F->getName();
4456 Name = Name.substr(5); // Strip llvm
4457 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4458 DefaultCase();
4459 return;
4460 }
4461 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4462 unsigned MinElts = RetTy->getMinNumElements();
4463 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4464 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4465 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4466 break;
4467 }
4468
4469 case Intrinsic::vector_insert: {
4470 StringRef Name = F->getName();
4471 Name = Name.substr(5);
4472 if (!Name.starts_with("aarch64.sve.tuple")) {
4473 DefaultCase();
4474 return;
4475 }
4476 if (Name.starts_with("aarch64.sve.tuple.set")) {
4477 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4478 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4479 Value *NewIdx =
4480 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4481 NewCall = Builder.CreateCall(
4482 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4483 break;
4484 }
4485 if (Name.starts_with("aarch64.sve.tuple.create")) {
4486 unsigned N = StringSwitch<unsigned>(Name)
4487 .StartsWith("aarch64.sve.tuple.create2", 2)
4488 .StartsWith("aarch64.sve.tuple.create3", 3)
4489 .StartsWith("aarch64.sve.tuple.create4", 4)
4490 .Default(0);
4491 assert(N > 1 && "Create is expected to be between 2-4");
4492 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4494 unsigned MinElts = RetTy->getMinNumElements() / N;
4495 for (unsigned I = 0; I < N; I++) {
4496 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4497 Value *V = CI->getArgOperand(I);
4498 Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4499 }
4500 NewCall = dyn_cast<CallInst>(Ret);
4501 }
4502 break;
4503 }
4504
4505 case Intrinsic::arm_neon_bfdot:
4506 case Intrinsic::arm_neon_bfmmla:
4507 case Intrinsic::arm_neon_bfmlalb:
4508 case Intrinsic::arm_neon_bfmlalt:
4509 case Intrinsic::aarch64_neon_bfdot:
4510 case Intrinsic::aarch64_neon_bfmmla:
4511 case Intrinsic::aarch64_neon_bfmlalb:
4512 case Intrinsic::aarch64_neon_bfmlalt: {
4514 assert(CI->arg_size() == 3 &&
4515 "Mismatch between function args and call args");
4516 size_t OperandWidth =
4518 assert((OperandWidth == 64 || OperandWidth == 128) &&
4519 "Unexpected operand width");
4520 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4521 auto Iter = CI->args().begin();
4522 Args.push_back(*Iter++);
4523 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4524 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4525 NewCall = Builder.CreateCall(NewFn, Args);
4526 break;
4527 }
4528
4529 case Intrinsic::bitreverse:
4530 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4531 break;
4532
4533 case Intrinsic::ctlz:
4534 case Intrinsic::cttz:
4535 assert(CI->arg_size() == 1 &&
4536 "Mismatch between function args and call args");
4537 NewCall =
4538 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4539 break;
4540
4541 case Intrinsic::objectsize: {
4542 Value *NullIsUnknownSize =
4543 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4544 Value *Dynamic =
4545 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4546 NewCall = Builder.CreateCall(
4547 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4548 break;
4549 }
4550
4551 case Intrinsic::ctpop:
4552 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4553 break;
4554
4555 case Intrinsic::convert_from_fp16:
4556 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4557 break;
4558
4559 case Intrinsic::dbg_value: {
4560 StringRef Name = F->getName();
4561 Name = Name.substr(5); // Strip llvm.
4562 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4563 if (Name.starts_with("dbg.addr")) {
4564 DIExpression *Expr = cast<DIExpression>(
4565 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4566 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4567 NewCall =
4568 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4569 MetadataAsValue::get(C, Expr)});
4570 break;
4571 }
4572
4573 // Upgrade from the old version that had an extra offset argument.
4574 assert(CI->arg_size() == 4);
4575 // Drop nonzero offsets instead of attempting to upgrade them.
4576 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4577 if (Offset->isZeroValue()) {
4578 NewCall = Builder.CreateCall(
4579 NewFn,
4580 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4581 break;
4582 }
4583 CI->eraseFromParent();
4584 return;
4585 }
4586
4587 case Intrinsic::ptr_annotation:
4588 // Upgrade from versions that lacked the annotation attribute argument.
4589 if (CI->arg_size() != 4) {
4590 DefaultCase();
4591 return;
4592 }
4593
4594 // Create a new call with an added null annotation attribute argument.
4595 NewCall =
4596 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4597 CI->getArgOperand(2), CI->getArgOperand(3),
4598 Constant::getNullValue(Builder.getPtrTy())});
4599 NewCall->takeName(CI);
4600 CI->replaceAllUsesWith(NewCall);
4601 CI->eraseFromParent();
4602 return;
4603
4604 case Intrinsic::var_annotation:
4605 // Upgrade from versions that lacked the annotation attribute argument.
4606 if (CI->arg_size() != 4) {
4607 DefaultCase();
4608 return;
4609 }
4610 // Create a new call with an added null annotation attribute argument.
4611 NewCall =
4612 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4613 CI->getArgOperand(2), CI->getArgOperand(3),
4614 Constant::getNullValue(Builder.getPtrTy())});
4615 NewCall->takeName(CI);
4616 CI->replaceAllUsesWith(NewCall);
4617 CI->eraseFromParent();
4618 return;
4619
4620 case Intrinsic::riscv_aes32dsi:
4621 case Intrinsic::riscv_aes32dsmi:
4622 case Intrinsic::riscv_aes32esi:
4623 case Intrinsic::riscv_aes32esmi:
4624 case Intrinsic::riscv_sm4ks:
4625 case Intrinsic::riscv_sm4ed: {
4626 // The last argument to these intrinsics used to be i8 and changed to i32.
4627 // The type overload for sm4ks and sm4ed was removed.
4628 Value *Arg2 = CI->getArgOperand(2);
4629 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4630 return;
4631
4632 Value *Arg0 = CI->getArgOperand(0);
4633 Value *Arg1 = CI->getArgOperand(1);
4634 if (CI->getType()->isIntegerTy(64)) {
4635 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4636 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4637 }
4638
4639 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4640 cast<ConstantInt>(Arg2)->getZExtValue());
4641
4642 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4643 Value *Res = NewCall;
4644 if (Res->getType() != CI->getType())
4645 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4646 NewCall->takeName(CI);
4647 CI->replaceAllUsesWith(Res);
4648 CI->eraseFromParent();
4649 return;
4650 }
4651 case Intrinsic::riscv_sha256sig0:
4652 case Intrinsic::riscv_sha256sig1:
4653 case Intrinsic::riscv_sha256sum0:
4654 case Intrinsic::riscv_sha256sum1:
4655 case Intrinsic::riscv_sm3p0:
4656 case Intrinsic::riscv_sm3p1: {
4657 // The last argument to these intrinsics used to be i8 and changed to i32.
4658 // The type overload for sm4ks and sm4ed was removed.
4659 if (!CI->getType()->isIntegerTy(64))
4660 return;
4661
4662 Value *Arg =
4663 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4664
4665 NewCall = Builder.CreateCall(NewFn, Arg);
4666 Value *Res =
4667 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4668 NewCall->takeName(CI);
4669 CI->replaceAllUsesWith(Res);
4670 CI->eraseFromParent();
4671 return;
4672 }
4673
4674 case Intrinsic::x86_xop_vfrcz_ss:
4675 case Intrinsic::x86_xop_vfrcz_sd:
4676 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4677 break;
4678
4679 case Intrinsic::x86_xop_vpermil2pd:
4680 case Intrinsic::x86_xop_vpermil2ps:
4681 case Intrinsic::x86_xop_vpermil2pd_256:
4682 case Intrinsic::x86_xop_vpermil2ps_256: {
4683 SmallVector<Value *, 4> Args(CI->args());
4684 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4685 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4686 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4687 NewCall = Builder.CreateCall(NewFn, Args);
4688 break;
4689 }
4690
4691 case Intrinsic::x86_sse41_ptestc:
4692 case Intrinsic::x86_sse41_ptestz:
4693 case Intrinsic::x86_sse41_ptestnzc: {
4694 // The arguments for these intrinsics used to be v4f32, and changed
4695 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4696 // So, the only thing required is a bitcast for both arguments.
4697 // First, check the arguments have the old type.
4698 Value *Arg0 = CI->getArgOperand(0);
4699 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4700 return;
4701
4702 // Old intrinsic, add bitcasts
4703 Value *Arg1 = CI->getArgOperand(1);
4704
4705 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4706
4707 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4708 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4709
4710 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4711 break;
4712 }
4713
4714 case Intrinsic::x86_rdtscp: {
4715 // This used to take 1 arguments. If we have no arguments, it is already
4716 // upgraded.
4717 if (CI->getNumOperands() == 0)
4718 return;
4719
4720 NewCall = Builder.CreateCall(NewFn);
4721 // Extract the second result and store it.
4722 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4723 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
4724 // Replace the original call result with the first result of the new call.
4725 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4726
4727 NewCall->takeName(CI);
4728 CI->replaceAllUsesWith(TSC);
4729 CI->eraseFromParent();
4730 return;
4731 }
4732
4733 case Intrinsic::x86_sse41_insertps:
4734 case Intrinsic::x86_sse41_dppd:
4735 case Intrinsic::x86_sse41_dpps:
4736 case Intrinsic::x86_sse41_mpsadbw:
4737 case Intrinsic::x86_avx_dp_ps_256:
4738 case Intrinsic::x86_avx2_mpsadbw: {
4739 // Need to truncate the last argument from i32 to i8 -- this argument models
4740 // an inherently 8-bit immediate operand to these x86 instructions.
4741 SmallVector<Value *, 4> Args(CI->args());
4742
4743 // Replace the last argument with a trunc.
4744 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4745 NewCall = Builder.CreateCall(NewFn, Args);
4746 break;
4747 }
4748
4749 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4750 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4751 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4752 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4753 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4754 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4755 SmallVector<Value *, 4> Args(CI->args());
4756 unsigned NumElts =
4757 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4758 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4759
4760 NewCall = Builder.CreateCall(NewFn, Args);
4761 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4762
4763 NewCall->takeName(CI);
4764 CI->replaceAllUsesWith(Res);
4765 CI->eraseFromParent();
4766 return;
4767 }
4768
4769 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4770 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4771 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4772 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4773 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4774 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4775 SmallVector<Value *, 4> Args(CI->args());
4776 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4777 if (NewFn->getIntrinsicID() ==
4778 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4779 Args[1] = Builder.CreateBitCast(
4780 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4781
4782 NewCall = Builder.CreateCall(NewFn, Args);
4783 Value *Res = Builder.CreateBitCast(
4784 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4785
4786 NewCall->takeName(CI);
4787 CI->replaceAllUsesWith(Res);
4788 CI->eraseFromParent();
4789 return;
4790 }
4791 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4792 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4793 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4794 SmallVector<Value *, 4> Args(CI->args());
4795 unsigned NumElts =
4796 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4797 Args[1] = Builder.CreateBitCast(
4798 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4799 Args[2] = Builder.CreateBitCast(
4800 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4801
4802 NewCall = Builder.CreateCall(NewFn, Args);
4803 break;
4804 }
4805
4806 case Intrinsic::thread_pointer: {
4807 NewCall = Builder.CreateCall(NewFn, {});
4808 break;
4809 }
4810
4811 case Intrinsic::memcpy:
4812 case Intrinsic::memmove:
4813 case Intrinsic::memset: {
4814 // We have to make sure that the call signature is what we're expecting.
4815 // We only want to change the old signatures by removing the alignment arg:
4816 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4817 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4818 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4819 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4820 // Note: i8*'s in the above can be any pointer type
4821 if (CI->arg_size() != 5) {
4822 DefaultCase();
4823 return;
4824 }
4825 // Remove alignment argument (3), and add alignment attributes to the
4826 // dest/src pointers.
4827 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4828 CI->getArgOperand(2), CI->getArgOperand(4)};
4829 NewCall = Builder.CreateCall(NewFn, Args);
4830 AttributeList OldAttrs = CI->getAttributes();
4832 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4833 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4834 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4835 NewCall->setAttributes(NewAttrs);
4836 auto *MemCI = cast<MemIntrinsic>(NewCall);
4837 // All mem intrinsics support dest alignment.
4838 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4839 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4840 // Memcpy/Memmove also support source alignment.
4841 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4842 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4843 break;
4844 }
4845 }
4846 assert(NewCall && "Should have either set this variable or returned through "
4847 "the default case");
4848 NewCall->takeName(CI);
4849 CI->replaceAllUsesWith(NewCall);
4850 CI->eraseFromParent();
4851}
4852
4854 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4855
4856 // Check if this function should be upgraded and get the replacement function
4857 // if there is one.
4858 Function *NewFn;
4859 if (UpgradeIntrinsicFunction(F, NewFn)) {
4860 // Replace all users of the old function with the new function or new
4861 // instructions. This is not a range loop because the call is deleted.
4862 for (User *U : make_early_inc_range(F->users()))
4863 if (CallBase *CB = dyn_cast<CallBase>(U))
4864 UpgradeIntrinsicCall(CB, NewFn);
4865
4866 // Remove old function, no longer used, from the module.
4867 F->eraseFromParent();
4868 }
4869}
4870
4872 const unsigned NumOperands = MD.getNumOperands();
4873 if (NumOperands == 0)
4874 return &MD; // Invalid, punt to a verifier error.
4875
4876 // Check if the tag uses struct-path aware TBAA format.
4877 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4878 return &MD;
4879
4880 auto &Context = MD.getContext();
4881 if (NumOperands == 3) {
4882 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4883 MDNode *ScalarType = MDNode::get(Context, Elts);
4884 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4885 Metadata *Elts2[] = {ScalarType, ScalarType,
4888 MD.getOperand(2)};
4889 return MDNode::get(Context, Elts2);
4890 }
4891 // Create a MDNode <MD, MD, offset 0>
4893 Type::getInt64Ty(Context)))};
4894 return MDNode::get(Context, Elts);
4895}
4896
4898 Instruction *&Temp) {
4899 if (Opc != Instruction::BitCast)
4900 return nullptr;
4901
4902 Temp = nullptr;
4903 Type *SrcTy = V->getType();
4904 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4905 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4906 LLVMContext &Context = V->getContext();
4907
4908 // We have no information about target data layout, so we assume that
4909 // the maximum pointer size is 64bit.
4910 Type *MidTy = Type::getInt64Ty(Context);
4911 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4912
4913 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4914 }
4915
4916 return nullptr;
4917}
4918
4920 if (Opc != Instruction::BitCast)
4921 return nullptr;
4922
4923 Type *SrcTy = C->getType();
4924 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4925 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4926 LLVMContext &Context = C->getContext();
4927
4928 // We have no information about target data layout, so we assume that
4929 // the maximum pointer size is 64bit.
4930 Type *MidTy = Type::getInt64Ty(Context);
4931
4933 DestTy);
4934 }
4935
4936 return nullptr;
4937}
4938
4939/// Check the debug info version number, if it is out-dated, drop the debug
4940/// info. Return true if module is modified.
4943 return false;
4944
4945 // We need to get metadata before the module is verified (i.e., getModuleFlag
4946 // makes assumptions that we haven't verified yet). Carefully extract the flag
4947 // from the metadata.
4948 unsigned Version = 0;
4949 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
4950 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
4951 if (Flag->getNumOperands() < 3)
4952 return false;
4953 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
4954 return K->getString() == "Debug Info Version";
4955 return false;
4956 });
4957 if (OpIt != ModFlags->op_end()) {
4958 const MDOperand &ValOp = (*OpIt)->getOperand(2);
4959 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
4960 Version = CI->getZExtValue();
4961 }
4962 }
4963
4964 if (Version == DEBUG_METADATA_VERSION) {
4965 bool BrokenDebugInfo = false;
4966 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4967 report_fatal_error("Broken module found, compilation aborted!");
4968 if (!BrokenDebugInfo)
4969 // Everything is ok.
4970 return false;
4971 else {
4972 // Diagnose malformed debug info.
4974 M.getContext().diagnose(Diag);
4975 }
4976 }
4977 bool Modified = StripDebugInfo(M);
4978 if (Modified && Version != DEBUG_METADATA_VERSION) {
4979 // Diagnose a version mismatch.
4980 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4981 M.getContext().diagnose(DiagVersion);
4982 }
4983 return Modified;
4984}
4985
4986/// This checks for objc retain release marker which should be upgraded. It
4987/// returns true if module is modified.
4989 bool Changed = false;
4990 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4991 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4992 if (ModRetainReleaseMarker) {
4993 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4994 if (Op) {
4995 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4996 if (ID) {
4997 SmallVector<StringRef, 4> ValueComp;
4998 ID->getString().split(ValueComp, "#");
4999 if (ValueComp.size() == 2) {
5000 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5001 ID = MDString::get(M.getContext(), NewValue);
5002 }
5003 M.addModuleFlag(Module::Error, MarkerKey, ID);
5004 M.eraseNamedMetadata(ModRetainReleaseMarker);
5005 Changed = true;
5006 }
5007 }
5008 }
5009 return Changed;
5010}
5011
5013 // This lambda converts normal function calls to ARC runtime functions to
5014 // intrinsic calls.
5015 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5016 llvm::Intrinsic::ID IntrinsicFunc) {
5017 Function *Fn = M.getFunction(OldFunc);
5018
5019 if (!Fn)
5020 return;
5021
5022 Function *NewFn =
5023 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5024
5025 for (User *U : make_early_inc_range(Fn->users())) {
5026 CallInst *CI = dyn_cast<CallInst>(U);
5027 if (!CI || CI->getCalledFunction() != Fn)
5028 continue;
5029
5030 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5031 FunctionType *NewFuncTy = NewFn->getFunctionType();
5033
5034 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5035 // value to the return type of the old function.
5036 if (NewFuncTy->getReturnType() != CI->getType() &&
5037 !CastInst::castIsValid(Instruction::BitCast, CI,
5038 NewFuncTy->getReturnType()))
5039 continue;
5040
5041 bool InvalidCast = false;
5042
5043 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5044 Value *Arg = CI->getArgOperand(I);
5045
5046 // Bitcast argument to the parameter type of the new function if it's
5047 // not a variadic argument.
5048 if (I < NewFuncTy->getNumParams()) {
5049 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5050 // to the parameter type of the new function.
5051 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5052 NewFuncTy->getParamType(I))) {
5053 InvalidCast = true;
5054 break;
5055 }
5056 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5057 }
5058 Args.push_back(Arg);
5059 }
5060
5061 if (InvalidCast)
5062 continue;
5063
5064 // Create a call instruction that calls the new function.
5065 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5066 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5067 NewCall->takeName(CI);
5068
5069 // Bitcast the return value back to the type of the old call.
5070 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5071
5072 if (!CI->use_empty())
5073 CI->replaceAllUsesWith(NewRetVal);
5074 CI->eraseFromParent();
5075 }
5076
5077 if (Fn->use_empty())
5078 Fn->eraseFromParent();
5079 };
5080
5081 // Unconditionally convert a call to "clang.arc.use" to a call to
5082 // "llvm.objc.clang.arc.use".
5083 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5084
5085 // Upgrade the retain release marker. If there is no need to upgrade
5086 // the marker, that means either the module is already new enough to contain
5087 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5089 return;
5090
5091 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5092 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5093 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5094 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5095 {"objc_autoreleaseReturnValue",
5096 llvm::Intrinsic::objc_autoreleaseReturnValue},
5097 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
5098 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
5099 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
5100 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
5101 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
5102 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
5103 {"objc_release", llvm::Intrinsic::objc_release},
5104 {"objc_retain", llvm::Intrinsic::objc_retain},
5105 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
5106 {"objc_retainAutoreleaseReturnValue",
5107 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
5108 {"objc_retainAutoreleasedReturnValue",
5109 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
5110 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
5111 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
5112 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
5113 {"objc_unsafeClaimAutoreleasedReturnValue",
5114 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
5115 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
5116 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
5117 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
5118 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
5119 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
5120 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
5121 {"objc_arc_annotation_topdown_bbstart",
5122 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
5123 {"objc_arc_annotation_topdown_bbend",
5124 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
5125 {"objc_arc_annotation_bottomup_bbstart",
5126 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
5127 {"objc_arc_annotation_bottomup_bbend",
5128 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
5129
5130 for (auto &I : RuntimeFuncs)
5131 UpgradeToIntrinsic(I.first, I.second);
5132}
5133
5135 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
5136 if (!ModFlags)
5137 return false;
5138
5139 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
5140 bool HasSwiftVersionFlag = false;
5141 uint8_t SwiftMajorVersion, SwiftMinorVersion;
5142 uint32_t SwiftABIVersion;
5143 auto Int8Ty = Type::getInt8Ty(M.getContext());
5144 auto Int32Ty = Type::getInt32Ty(M.getContext());
5145
5146 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
5147 MDNode *Op = ModFlags->getOperand(I);
5148 if (Op->getNumOperands() != 3)
5149 continue;
5150 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
5151 if (!ID)
5152 continue;
5153 auto SetBehavior = [&](Module::ModFlagBehavior B) {
5154 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
5155 Type::getInt32Ty(M.getContext()), B)),
5156 MDString::get(M.getContext(), ID->getString()),
5157 Op->getOperand(2)};
5158 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5159 Changed = true;
5160 };
5161
5162 if (ID->getString() == "Objective-C Image Info Version")
5163 HasObjCFlag = true;
5164 if (ID->getString() == "Objective-C Class Properties")
5165 HasClassProperties = true;
5166 // Upgrade PIC from Error/Max to Min.
5167 if (ID->getString() == "PIC Level") {
5168 if (auto *Behavior =
5169 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5170 uint64_t V = Behavior->getLimitedValue();
5171 if (V == Module::Error || V == Module::Max)
5172 SetBehavior(Module::Min);
5173 }
5174 }
5175 // Upgrade "PIE Level" from Error to Max.
5176 if (ID->getString() == "PIE Level")
5177 if (auto *Behavior =
5178 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5179 if (Behavior->getLimitedValue() == Module::Error)
5180 SetBehavior(Module::Max);
5181
5182 // Upgrade branch protection and return address signing module flags. The
5183 // module flag behavior for these fields were Error and now they are Min.
5184 if (ID->getString() == "branch-target-enforcement" ||
5185 ID->getString().starts_with("sign-return-address")) {
5186 if (auto *Behavior =
5187 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5188 if (Behavior->getLimitedValue() == Module::Error) {
5189 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5190 Metadata *Ops[3] = {
5191 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5192 Op->getOperand(1), Op->getOperand(2)};
5193 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5194 Changed = true;
5195 }
5196 }
5197 }
5198
5199 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5200 // section name so that llvm-lto will not complain about mismatching
5201 // module flags that is functionally the same.
5202 if (ID->getString() == "Objective-C Image Info Section") {
5203 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5204 SmallVector<StringRef, 4> ValueComp;
5205 Value->getString().split(ValueComp, " ");
5206 if (ValueComp.size() != 1) {
5207 std::string NewValue;
5208 for (auto &S : ValueComp)
5209 NewValue += S.str();
5210 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5211 MDString::get(M.getContext(), NewValue)};
5212 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5213 Changed = true;
5214 }
5215 }
5216 }
5217
5218 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5219 // If the higher bits are set, it adds new module flag for swift info.
5220 if (ID->getString() == "Objective-C Garbage Collection") {
5221 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5222 if (Md) {
5223 assert(Md->getValue() && "Expected non-empty metadata");
5224 auto Type = Md->getValue()->getType();
5225 if (Type == Int8Ty)
5226 continue;
5227 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5228 if ((Val & 0xff) != Val) {
5229 HasSwiftVersionFlag = true;
5230 SwiftABIVersion = (Val & 0xff00) >> 8;
5231 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5232 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5233 }
5234 Metadata *Ops[3] = {
5235 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
5236 Op->getOperand(1),
5237 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5238 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5239 Changed = true;
5240 }
5241 }
5242
5243 if (ID->getString() == "amdgpu_code_object_version") {
5244 Metadata *Ops[3] = {
5245 Op->getOperand(0),
5246 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5247 Op->getOperand(2)};
5248 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5249 Changed = true;
5250 }
5251 }
5252
5253 // "Objective-C Class Properties" is recently added for Objective-C. We
5254 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5255 // flag of value 0, so we can correclty downgrade this flag when trying to
5256 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5257 // this module flag.
5258 if (HasObjCFlag && !HasClassProperties) {
5259 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5260 (uint32_t)0);
5261 Changed = true;
5262 }
5263
5264 if (HasSwiftVersionFlag) {
5265 M.addModuleFlag(Module::Error, "Swift ABI Version",
5266 SwiftABIVersion);
5267 M.addModuleFlag(Module::Error, "Swift Major Version",
5268 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5269 M.addModuleFlag(Module::Error, "Swift Minor Version",
5270 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5271 Changed = true;
5272 }
5273
5274 return Changed;
5275}
5276
5278 auto TrimSpaces = [](StringRef Section) -> std::string {
5279 SmallVector<StringRef, 5> Components;
5280 Section.split(Components, ',');
5281
5282 SmallString<32> Buffer;
5283 raw_svector_ostream OS(Buffer);
5284
5285 for (auto Component : Components)
5286 OS << ',' << Component.trim();
5287
5288 return std::string(OS.str().substr(1));
5289 };
5290
5291 for (auto &GV : M.globals()) {
5292 if (!GV.hasSection())
5293 continue;
5294
5295 StringRef Section = GV.getSection();
5296
5297 if (!Section.starts_with("__DATA, __objc_catlist"))
5298 continue;
5299
5300 // __DATA, __objc_catlist, regular, no_dead_strip
5301 // __DATA,__objc_catlist,regular,no_dead_strip
5302 GV.setSection(TrimSpaces(Section));
5303 }
5304}
5305
5306namespace {
5307// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5308// callsites within a function that did not also have the strictfp attribute.
5309// Since 10.0, if strict FP semantics are needed within a function, the
5310// function must have the strictfp attribute and all calls within the function
5311// must also have the strictfp attribute. This latter restriction is
5312// necessary to prevent unwanted libcall simplification when a function is
5313// being cloned (such as for inlining).
5314//
5315// The "dangling" strictfp attribute usage was only used to prevent constant
5316// folding and other libcall simplification. The nobuiltin attribute on the
5317// callsite has the same effect.
5318struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5319 StrictFPUpgradeVisitor() = default;
5320
5321 void visitCallBase(CallBase &Call) {
5322 if (!Call.isStrictFP())
5323 return;
5324 if (isa<ConstrainedFPIntrinsic>(&Call))
5325 return;
5326 // If we get here, the caller doesn't have the strictfp attribute
5327 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5328 Call.removeFnAttr(Attribute::StrictFP);
5329 Call.addFnAttr(Attribute::NoBuiltin);
5330 }
5331};
5332
5333/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
5334struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
5335 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
5336 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
5337
5339 if (!RMW.isFloatingPointOperation())
5340 return;
5341
5342 MDNode *Empty = MDNode::get(RMW.getContext(), {});
5343 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
5344 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
5345 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
5346 }
5347};
5348} // namespace
5349
5351 // If a function definition doesn't have the strictfp attribute,
5352 // convert any callsite strictfp attributes to nobuiltin.
5353 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5354 StrictFPUpgradeVisitor SFPV;
5355 SFPV.visit(F);
5356 }
5357
5358 // Remove all incompatibile attributes from function.
5359 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
5360 F.getReturnType(), F.getAttributes().getRetAttrs()));
5361 for (auto &Arg : F.args())
5362 Arg.removeAttrs(
5363 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
5364
5365 // Older versions of LLVM treated an "implicit-section-name" attribute
5366 // similarly to directly setting the section on a Function.
5367 if (Attribute A = F.getFnAttribute("implicit-section-name");
5368 A.isValid() && A.isStringAttribute()) {
5369 F.setSection(A.getValueAsString());
5370 F.removeFnAttr("implicit-section-name");
5371 }
5372
5373 if (!F.empty()) {
5374 // For some reason this is called twice, and the first time is before any
5375 // instructions are loaded into the body.
5376
5377 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
5378 A.isValid()) {
5379
5380 if (A.getValueAsBool()) {
5381 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
5382 Visitor.visit(F);
5383 }
5384
5385 // We will leave behind dead attribute uses on external declarations, but
5386 // clang never added these to declarations anyway.
5387 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
5388 }
5389 }
5390}
5391
5392static bool isOldLoopArgument(Metadata *MD) {
5393 auto *T = dyn_cast_or_null<MDTuple>(MD);
5394 if (!T)
5395 return false;
5396 if (T->getNumOperands() < 1)
5397 return false;
5398 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5399 if (!S)
5400 return false;
5401 return S->getString().starts_with("llvm.vectorizer.");
5402}
5403
5405 StringRef OldPrefix = "llvm.vectorizer.";
5406 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5407
5408 if (OldTag == "llvm.vectorizer.unroll")
5409 return MDString::get(C, "llvm.loop.interleave.count");
5410
5411 return MDString::get(
5412 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5413 .str());
5414}
5415
5417 auto *T = dyn_cast_or_null<MDTuple>(MD);
5418 if (!T)
5419 return MD;
5420 if (T->getNumOperands() < 1)
5421 return MD;
5422 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5423 if (!OldTag)
5424 return MD;
5425 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5426 return MD;
5427
5428 // This has an old tag. Upgrade it.
5430 Ops.reserve(T->getNumOperands());
5431 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5432 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5433 Ops.push_back(T->getOperand(I));
5434
5435 return MDTuple::get(T->getContext(), Ops);
5436}
5437
5439 auto *T = dyn_cast<MDTuple>(&N);
5440 if (!T)
5441 return &N;
5442
5443 if (none_of(T->operands(), isOldLoopArgument))
5444 return &N;
5445
5447 Ops.reserve(T->getNumOperands());
5448 for (Metadata *MD : T->operands())
5450
5451 return MDTuple::get(T->getContext(), Ops);
5452}
5453
5455 Triple T(TT);
5456 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
5457 // the address space of globals to 1. This does not apply to SPIRV Logical.
5458 if (((T.isAMDGPU() && !T.isAMDGCN()) ||
5459 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&
5460 !DL.contains("-G") && !DL.starts_with("G")) {
5461 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5462 }
5463
5464 if (T.isLoongArch64() || T.isRISCV64()) {
5465 // Make i32 a native type for 64-bit LoongArch and RISC-V.
5466 auto I = DL.find("-n64-");
5467 if (I != StringRef::npos)
5468 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5469 return DL.str();
5470 }
5471
5472 std::string Res = DL.str();
5473 // AMDGCN data layout upgrades.
5474 if (T.isAMDGCN()) {
5475 // Define address spaces for constants.
5476 if (!DL.contains("-G") && !DL.starts_with("G"))
5477 Res.append(Res.empty() ? "G1" : "-G1");
5478
5479 // Add missing non-integral declarations.
5480 // This goes before adding new address spaces to prevent incoherent string
5481 // values.
5482 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5483 Res.append("-ni:7:8:9");
5484 // Update ni:7 to ni:7:8:9.
5485 if (DL.ends_with("ni:7"))
5486 Res.append(":8:9");
5487 if (DL.ends_with("ni:7:8"))
5488 Res.append(":9");
5489
5490 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5491 // resources) An empty data layout has already been upgraded to G1 by now.
5492 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5493 Res.append("-p7:160:256:256:32");
5494 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5495 Res.append("-p8:128:128");
5496 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5497 Res.append("-p9:192:256:256:32");
5498
5499 return Res;
5500 }
5501
5502 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
5503 // If the datalayout matches the expected format, add pointer size address
5504 // spaces to the datalayout.
5505 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
5506 if (!DL.contains(AddrSpaces)) {
5508 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
5509 if (R.match(Res, &Groups))
5510 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5511 }
5512 };
5513
5514 // AArch64 data layout upgrades.
5515 if (T.isAArch64()) {
5516 // Add "-Fn32"
5517 if (!DL.empty() && !DL.contains("-Fn32"))
5518 Res.append("-Fn32");
5519 AddPtr32Ptr64AddrSpaces();
5520 return Res;
5521 }
5522
5523 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
5524 T.isWasm()) {
5525 // Mips64 with o32 ABI did not add "-i128:128".
5526 // Add "-i128:128"
5527 std::string I64 = "-i64:64";
5528 std::string I128 = "-i128:128";
5529 if (!StringRef(Res).contains(I128)) {
5530 size_t Pos = Res.find(I64);
5531 if (Pos != size_t(-1))
5532 Res.insert(Pos + I64.size(), I128);
5533 }
5534 return Res;
5535 }
5536
5537 if (!T.isX86())
5538 return Res;
5539
5540 AddPtr32Ptr64AddrSpaces();
5541
5542 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5543 // for i128 operations prior to this being reflected in the data layout, and
5544 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5545 // boundaries, so although this is a breaking change, the upgrade is expected
5546 // to fix more IR than it breaks.
5547 // Intel MCU is an exception and uses 4-byte-alignment.
5548 if (!T.isOSIAMCU()) {
5549 std::string I128 = "-i128:128";
5550 if (StringRef Ref = Res; !Ref.contains(I128)) {
5552 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5553 if (R.match(Res, &Groups))
5554 Res = (Groups[1] + I128 + Groups[3]).str();
5555 }
5556 }
5557
5558 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5559 // Raising the alignment is safe because Clang did not produce f80 values in
5560 // the MSVC environment before this upgrade was added.
5561 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5562 StringRef Ref = Res;
5563 auto I = Ref.find("-f80:32-");
5564 if (I != StringRef::npos)
5565 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5566 }
5567
5568 return Res;
5569}
5570
5572 StringRef FramePointer;
5573 Attribute A = B.getAttribute("no-frame-pointer-elim");
5574 if (A.isValid()) {
5575 // The value can be "true" or "false".
5576 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5577 B.removeAttribute("no-frame-pointer-elim");
5578 }
5579 if (B.contains("no-frame-pointer-elim-non-leaf")) {
5580 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5581 if (FramePointer != "all")
5582 FramePointer = "non-leaf";
5583 B.removeAttribute("no-frame-pointer-elim-non-leaf");
5584 }
5585 if (!FramePointer.empty())
5586 B.addAttribute("frame-pointer", FramePointer);
5587
5588 A = B.getAttribute("null-pointer-is-valid");
5589 if (A.isValid()) {
5590 // The value can be "true" or "false".
5591 bool NullPointerIsValid = A.getValueAsString() == "true";
5592 B.removeAttribute("null-pointer-is-valid");
5593 if (NullPointerIsValid)
5594 B.addAttribute(Attribute::NullPointerIsValid);
5595 }
5596}
5597
5598void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5599 // clang.arc.attachedcall bundles are now required to have an operand.
5600 // If they don't, it's okay to drop them entirely: when there is an operand,
5601 // the "attachedcall" is meaningful and required, but without an operand,
5602 // it's just a marker NOP. Dropping it merely prevents an optimization.
5603 erase_if(Bundles, [&](OperandBundleDef &OBD) {
5604 return OBD.getTag() == "clang.arc.attachedcall" &&
5605 OBD.inputs().empty();
5606 });
5607}
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:91
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:75
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDType * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
Definition: AutoUpgrade.cpp:55
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:59
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
This file contains constants used for implementing Dwarf debug support.
std::string Name
uint64_t Size
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
uint64_t High
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
raw_pwrite_stream & OS
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:395
Type * getElementType() const
Definition: DerivedTypes.h:408
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
Definition: Instructions.h:841
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ FAdd
*p = old + v
Definition: Instructions.h:741
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:752
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:748
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
bool isFloatingPointOperation() const
Definition: Instructions.h:882
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1341
Value * getCalledOperand() const
Definition: InstrTypes.h:1334
void setAttributes(AttributeList A)
Set the attributes for this call.
Definition: InstrTypes.h:1420
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1286
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1199
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1277
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1377
unsigned arg_size() const
Definition: InstrTypes.h:1284
AttributeList getAttributes() const
Return the attributes for this call.
Definition: InstrTypes.h:1417
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1380
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1672
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1312
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:532
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2307
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2253
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2293
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1378
static ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
Definition: Constants.cpp:1522
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
DWARF expression.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
This class represents an Operation in the Expression.
Records a position in IR for a source label (DILabel).
Base class for non-instruction debug metadata records that have positions within IR.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
Class to represent function types.
Definition: DerivedTypes.h:105
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:137
Type * getReturnType() const
Definition: DerivedTypes.h:126
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:173
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:251
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Function.cpp:458
size_t arg_size() const
Definition: Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
Argument * getArg(unsigned i) const
Definition: Function.h:886
LinkageTypes getLinkage() const
Definition: GlobalValue.h:547
Type * getValueType() const
Definition: GlobalValue.h:297
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:480
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1595
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2511
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:530
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1633
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Definition: IRBuilder.h:1080
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2106
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2499
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:558
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1576
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1163
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:546
Value * CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2298
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1053
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition: IRBuilder.h:495
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2093
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition: IRBuilder.h:540
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1733
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2306
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2270
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2152
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1798
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1459
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2533
LLVMContext & getContext() const
Definition: IRBuilder.h:195
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1518
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:566
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1370
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:490
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Definition: IRBuilder.h:573
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition: IRBuilder.h:2588
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1862
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:588
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2282
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2225
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2128
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1499
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1562
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2290
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1614
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1742
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:535
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2157
Type * getBFloatTy()
Fetch the type representing a 16-bit brain floating point value.
Definition: IRBuilder.h:568
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1404
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:270
RetTy visitAtomicRMWInst(AtomicRMWInst &I)
Definition: InstVisitor.h:172
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:74
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
Definition: Instructions.h:176
MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition: MDBuilder.cpp:95
Metadata node.
Definition: Metadata.h:1073
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1434
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1549
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1440
LLVMContext & getContext() const
Definition: Metadata.h:1237
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:895
A single uniqued string.
Definition: Metadata.h:724
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:606
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1506
Metadata wrapper in the Value hierarchy.
Definition: Metadata.h:180
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:103
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition: Module.h:115
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition: Module.h:136
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition: Module.h:118
@ Min
Takes the min of the two values, which are required to be integers.
Definition: Module.h:150
@ Max
Takes the max of the two values, which are required to be integers.
Definition: Module.h:147
bool IsNewDbgInfoFormat
Is this Module using intrinsics to record the position of debugging information, or non-intrinsic rec...
Definition: Module.h:217
A tuple of MDNodes.
Definition: Metadata.h:1737
void setOperand(unsigned I, MDNode *New)
Definition: Metadata.cpp:1433
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1425
unsigned getNumOperands() const
Definition: Metadata.cpp:1421
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: InstrTypes.h:1065
ArrayRef< InputTy > inputs() const
Definition: InstrTypes.h:1080
StringRef getTag() const
Definition: InstrTypes.h:1088
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:83
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:812
ArrayRef< int > getShuffleMask() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:78
void reserve(size_type N)
Definition: SmallVector.h:663
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:609
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
static constexpr size_t npos
Definition: StringRef.h:53
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & StartsWith(StringLiteral S, T Value)
Definition: StringSwitch.h:83
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:218
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:406
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:365
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:366
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getBFloatTy(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:267
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Base class of all SIMD vector types.
Definition: DerivedTypes.h:427
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
AttributeMask typeIncompatible(Type *Ty, AttributeSet AS, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
Definition: Intrinsics.cpp:446
std::optional< Function * > remangleIntrinsicFunction(Function *F)
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Intrinsics.cpp:46
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
Op::Description Desc
void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:608
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition: Metadata.h:52
bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:7308
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117