LLVM 19.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/Verifier.h"
42#include "llvm/Support/Regex.h"
44#include <cstring>
45
46using namespace llvm;
47
48static cl::opt<bool>
49 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50 cl::desc("Disable autoupgrade of debug info"));
51
52static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53
54// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55// changed their type from v4f32 to v2i64.
57 Function *&NewFn) {
58 // Check whether this is an old version of the function, which received
59 // v4f32 arguments.
60 Type *Arg0Type = F->getFunctionType()->getParamType(0);
61 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62 return false;
63
64 // Yes, it's old, replace it with new version.
65 rename(F);
66 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67 return true;
68}
69
70// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71// arguments have changed their type from i32 to i8.
73 Function *&NewFn) {
74 // Check that the last argument is an i32.
75 Type *LastArgType = F->getFunctionType()->getParamType(
76 F->getFunctionType()->getNumParams() - 1);
77 if (!LastArgType->isIntegerTy(32))
78 return false;
79
80 // Move this function aside and map down.
81 rename(F);
82 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83 return true;
84}
85
86// Upgrade the declaration of fp compare intrinsics that change return type
87// from scalar to vXi1 mask.
89 Function *&NewFn) {
90 // Check if the return type is a vector.
91 if (F->getReturnType()->isVectorTy())
92 return false;
93
94 rename(F);
95 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96 return true;
97}
98
100 Function *&NewFn) {
101 if (F->getReturnType()->getScalarType()->isBFloatTy())
102 return false;
103
104 rename(F);
105 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106 return true;
107}
108
110 Function *&NewFn) {
111 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112 return false;
113
114 rename(F);
115 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116 return true;
117}
118
120 // All of the intrinsics matches below should be marked with which llvm
121 // version started autoupgrading them. At some point in the future we would
122 // like to use this information to remove upgrade code for some older
123 // intrinsics. It is currently undecided how we will determine that future
124 // point.
125 if (Name.consume_front("avx."))
126 return (Name.starts_with("blend.p") || // Added in 3.7
127 Name == "cvt.ps2.pd.256" || // Added in 3.9
128 Name == "cvtdq2.pd.256" || // Added in 3.9
129 Name == "cvtdq2.ps.256" || // Added in 7.0
130 Name.starts_with("movnt.") || // Added in 3.2
131 Name.starts_with("sqrt.p") || // Added in 7.0
132 Name.starts_with("storeu.") || // Added in 3.9
133 Name.starts_with("vbroadcast.s") || // Added in 3.5
134 Name.starts_with("vbroadcastf128") || // Added in 4.0
135 Name.starts_with("vextractf128.") || // Added in 3.7
136 Name.starts_with("vinsertf128.") || // Added in 3.7
137 Name.starts_with("vperm2f128.") || // Added in 6.0
138 Name.starts_with("vpermil.")); // Added in 3.1
139
140 if (Name.consume_front("avx2."))
141 return (Name == "movntdqa" || // Added in 5.0
142 Name.starts_with("pabs.") || // Added in 6.0
143 Name.starts_with("padds.") || // Added in 8.0
144 Name.starts_with("paddus.") || // Added in 8.0
145 Name.starts_with("pblendd.") || // Added in 3.7
146 Name == "pblendw" || // Added in 3.7
147 Name.starts_with("pbroadcast") || // Added in 3.8
148 Name.starts_with("pcmpeq.") || // Added in 3.1
149 Name.starts_with("pcmpgt.") || // Added in 3.1
150 Name.starts_with("pmax") || // Added in 3.9
151 Name.starts_with("pmin") || // Added in 3.9
152 Name.starts_with("pmovsx") || // Added in 3.9
153 Name.starts_with("pmovzx") || // Added in 3.9
154 Name == "pmul.dq" || // Added in 7.0
155 Name == "pmulu.dq" || // Added in 7.0
156 Name.starts_with("psll.dq") || // Added in 3.7
157 Name.starts_with("psrl.dq") || // Added in 3.7
158 Name.starts_with("psubs.") || // Added in 8.0
159 Name.starts_with("psubus.") || // Added in 8.0
160 Name.starts_with("vbroadcast") || // Added in 3.8
161 Name == "vbroadcasti128" || // Added in 3.7
162 Name == "vextracti128" || // Added in 3.7
163 Name == "vinserti128" || // Added in 3.7
164 Name == "vperm2i128"); // Added in 6.0
165
166 if (Name.consume_front("avx512.")) {
167 if (Name.consume_front("mask."))
168 // 'avx512.mask.*'
169 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
170 Name.starts_with("and.") || // Added in 3.9
171 Name.starts_with("andn.") || // Added in 3.9
172 Name.starts_with("broadcast.s") || // Added in 3.9
173 Name.starts_with("broadcastf32x4.") || // Added in 6.0
174 Name.starts_with("broadcastf32x8.") || // Added in 6.0
175 Name.starts_with("broadcastf64x2.") || // Added in 6.0
176 Name.starts_with("broadcastf64x4.") || // Added in 6.0
177 Name.starts_with("broadcasti32x4.") || // Added in 6.0
178 Name.starts_with("broadcasti32x8.") || // Added in 6.0
179 Name.starts_with("broadcasti64x2.") || // Added in 6.0
180 Name.starts_with("broadcasti64x4.") || // Added in 6.0
181 Name.starts_with("cmp.b") || // Added in 5.0
182 Name.starts_with("cmp.d") || // Added in 5.0
183 Name.starts_with("cmp.q") || // Added in 5.0
184 Name.starts_with("cmp.w") || // Added in 5.0
185 Name.starts_with("compress.b") || // Added in 9.0
186 Name.starts_with("compress.d") || // Added in 9.0
187 Name.starts_with("compress.p") || // Added in 9.0
188 Name.starts_with("compress.q") || // Added in 9.0
189 Name.starts_with("compress.store.") || // Added in 7.0
190 Name.starts_with("compress.w") || // Added in 9.0
191 Name.starts_with("conflict.") || // Added in 9.0
192 Name.starts_with("cvtdq2pd.") || // Added in 4.0
193 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
194 Name == "cvtpd2dq.256" || // Added in 7.0
195 Name == "cvtpd2ps.256" || // Added in 7.0
196 Name == "cvtps2pd.128" || // Added in 7.0
197 Name == "cvtps2pd.256" || // Added in 7.0
198 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
199 Name == "cvtqq2ps.256" || // Added in 9.0
200 Name == "cvtqq2ps.512" || // Added in 9.0
201 Name == "cvttpd2dq.256" || // Added in 7.0
202 Name == "cvttps2dq.128" || // Added in 7.0
203 Name == "cvttps2dq.256" || // Added in 7.0
204 Name.starts_with("cvtudq2pd.") || // Added in 4.0
205 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
206 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
207 Name == "cvtuqq2ps.256" || // Added in 9.0
208 Name == "cvtuqq2ps.512" || // Added in 9.0
209 Name.starts_with("dbpsadbw.") || // Added in 7.0
210 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
211 Name.starts_with("expand.b") || // Added in 9.0
212 Name.starts_with("expand.d") || // Added in 9.0
213 Name.starts_with("expand.load.") || // Added in 7.0
214 Name.starts_with("expand.p") || // Added in 9.0
215 Name.starts_with("expand.q") || // Added in 9.0
216 Name.starts_with("expand.w") || // Added in 9.0
217 Name.starts_with("fpclass.p") || // Added in 7.0
218 Name.starts_with("insert") || // Added in 4.0
219 Name.starts_with("load.") || // Added in 3.9
220 Name.starts_with("loadu.") || // Added in 3.9
221 Name.starts_with("lzcnt.") || // Added in 5.0
222 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
223 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
224 Name.starts_with("movddup") || // Added in 3.9
225 Name.starts_with("move.s") || // Added in 4.0
226 Name.starts_with("movshdup") || // Added in 3.9
227 Name.starts_with("movsldup") || // Added in 3.9
228 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with("or.") || // Added in 3.9
230 Name.starts_with("pabs.") || // Added in 6.0
231 Name.starts_with("packssdw.") || // Added in 5.0
232 Name.starts_with("packsswb.") || // Added in 5.0
233 Name.starts_with("packusdw.") || // Added in 5.0
234 Name.starts_with("packuswb.") || // Added in 5.0
235 Name.starts_with("padd.") || // Added in 4.0
236 Name.starts_with("padds.") || // Added in 8.0
237 Name.starts_with("paddus.") || // Added in 8.0
238 Name.starts_with("palignr.") || // Added in 3.9
239 Name.starts_with("pand.") || // Added in 3.9
240 Name.starts_with("pandn.") || // Added in 3.9
241 Name.starts_with("pavg") || // Added in 6.0
242 Name.starts_with("pbroadcast") || // Added in 6.0
243 Name.starts_with("pcmpeq.") || // Added in 3.9
244 Name.starts_with("pcmpgt.") || // Added in 3.9
245 Name.starts_with("perm.df.") || // Added in 3.9
246 Name.starts_with("perm.di.") || // Added in 3.9
247 Name.starts_with("permvar.") || // Added in 7.0
248 Name.starts_with("pmaddubs.w.") || // Added in 7.0
249 Name.starts_with("pmaddw.d.") || // Added in 7.0
250 Name.starts_with("pmax") || // Added in 4.0
251 Name.starts_with("pmin") || // Added in 4.0
252 Name == "pmov.qd.256" || // Added in 9.0
253 Name == "pmov.qd.512" || // Added in 9.0
254 Name == "pmov.wb.256" || // Added in 9.0
255 Name == "pmov.wb.512" || // Added in 9.0
256 Name.starts_with("pmovsx") || // Added in 4.0
257 Name.starts_with("pmovzx") || // Added in 4.0
258 Name.starts_with("pmul.dq.") || // Added in 4.0
259 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260 Name.starts_with("pmulh.w.") || // Added in 7.0
261 Name.starts_with("pmulhu.w.") || // Added in 7.0
262 Name.starts_with("pmull.") || // Added in 4.0
263 Name.starts_with("pmultishift.qb.") || // Added in 8.0
264 Name.starts_with("pmulu.dq.") || // Added in 4.0
265 Name.starts_with("por.") || // Added in 3.9
266 Name.starts_with("prol.") || // Added in 8.0
267 Name.starts_with("prolv.") || // Added in 8.0
268 Name.starts_with("pror.") || // Added in 8.0
269 Name.starts_with("prorv.") || // Added in 8.0
270 Name.starts_with("pshuf.b.") || // Added in 4.0
271 Name.starts_with("pshuf.d.") || // Added in 3.9
272 Name.starts_with("pshufh.w.") || // Added in 3.9
273 Name.starts_with("pshufl.w.") || // Added in 3.9
274 Name.starts_with("psll.d") || // Added in 4.0
275 Name.starts_with("psll.q") || // Added in 4.0
276 Name.starts_with("psll.w") || // Added in 4.0
277 Name.starts_with("pslli") || // Added in 4.0
278 Name.starts_with("psllv") || // Added in 4.0
279 Name.starts_with("psra.d") || // Added in 4.0
280 Name.starts_with("psra.q") || // Added in 4.0
281 Name.starts_with("psra.w") || // Added in 4.0
282 Name.starts_with("psrai") || // Added in 4.0
283 Name.starts_with("psrav") || // Added in 4.0
284 Name.starts_with("psrl.d") || // Added in 4.0
285 Name.starts_with("psrl.q") || // Added in 4.0
286 Name.starts_with("psrl.w") || // Added in 4.0
287 Name.starts_with("psrli") || // Added in 4.0
288 Name.starts_with("psrlv") || // Added in 4.0
289 Name.starts_with("psub.") || // Added in 4.0
290 Name.starts_with("psubs.") || // Added in 8.0
291 Name.starts_with("psubus.") || // Added in 8.0
292 Name.starts_with("pternlog.") || // Added in 7.0
293 Name.starts_with("punpckh") || // Added in 3.9
294 Name.starts_with("punpckl") || // Added in 3.9
295 Name.starts_with("pxor.") || // Added in 3.9
296 Name.starts_with("shuf.f") || // Added in 6.0
297 Name.starts_with("shuf.i") || // Added in 6.0
298 Name.starts_with("shuf.p") || // Added in 4.0
299 Name.starts_with("sqrt.p") || // Added in 7.0
300 Name.starts_with("store.b.") || // Added in 3.9
301 Name.starts_with("store.d.") || // Added in 3.9
302 Name.starts_with("store.p") || // Added in 3.9
303 Name.starts_with("store.q.") || // Added in 3.9
304 Name.starts_with("store.w.") || // Added in 3.9
305 Name == "store.ss" || // Added in 7.0
306 Name.starts_with("storeu.") || // Added in 3.9
307 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
308 Name.starts_with("ucmp.") || // Added in 5.0
309 Name.starts_with("unpckh.") || // Added in 3.9
310 Name.starts_with("unpckl.") || // Added in 3.9
311 Name.starts_with("valign.") || // Added in 4.0
312 Name == "vcvtph2ps.128" || // Added in 11.0
313 Name == "vcvtph2ps.256" || // Added in 11.0
314 Name.starts_with("vextract") || // Added in 4.0
315 Name.starts_with("vfmadd.") || // Added in 7.0
316 Name.starts_with("vfmaddsub.") || // Added in 7.0
317 Name.starts_with("vfnmadd.") || // Added in 7.0
318 Name.starts_with("vfnmsub.") || // Added in 7.0
319 Name.starts_with("vpdpbusd.") || // Added in 7.0
320 Name.starts_with("vpdpbusds.") || // Added in 7.0
321 Name.starts_with("vpdpwssd.") || // Added in 7.0
322 Name.starts_with("vpdpwssds.") || // Added in 7.0
323 Name.starts_with("vpermi2var.") || // Added in 7.0
324 Name.starts_with("vpermil.p") || // Added in 3.9
325 Name.starts_with("vpermilvar.") || // Added in 4.0
326 Name.starts_with("vpermt2var.") || // Added in 7.0
327 Name.starts_with("vpmadd52") || // Added in 7.0
328 Name.starts_with("vpshld.") || // Added in 7.0
329 Name.starts_with("vpshldv.") || // Added in 8.0
330 Name.starts_with("vpshrd.") || // Added in 7.0
331 Name.starts_with("vpshrdv.") || // Added in 8.0
332 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333 Name.starts_with("xor.")); // Added in 3.9
334
335 if (Name.consume_front("mask3."))
336 // 'avx512.mask3.*'
337 return (Name.starts_with("vfmadd.") || // Added in 7.0
338 Name.starts_with("vfmaddsub.") || // Added in 7.0
339 Name.starts_with("vfmsub.") || // Added in 7.0
340 Name.starts_with("vfmsubadd.") || // Added in 7.0
341 Name.starts_with("vfnmsub.")); // Added in 7.0
342
343 if (Name.consume_front("maskz."))
344 // 'avx512.maskz.*'
345 return (Name.starts_with("pternlog.") || // Added in 7.0
346 Name.starts_with("vfmadd.") || // Added in 7.0
347 Name.starts_with("vfmaddsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermt2var.") || // Added in 7.0
353 Name.starts_with("vpmadd52") || // Added in 7.0
354 Name.starts_with("vpshldv.") || // Added in 8.0
355 Name.starts_with("vpshrdv.")); // Added in 8.0
356
357 // 'avx512.*'
358 return (Name == "movntdqa" || // Added in 5.0
359 Name == "pmul.dq.512" || // Added in 7.0
360 Name == "pmulu.dq.512" || // Added in 7.0
361 Name.starts_with("broadcastm") || // Added in 6.0
362 Name.starts_with("cmp.p") || // Added in 12.0
363 Name.starts_with("cvtb2mask.") || // Added in 7.0
364 Name.starts_with("cvtd2mask.") || // Added in 7.0
365 Name.starts_with("cvtmask2") || // Added in 5.0
366 Name.starts_with("cvtq2mask.") || // Added in 7.0
367 Name == "cvtusi2sd" || // Added in 7.0
368 Name.starts_with("cvtw2mask.") || // Added in 7.0
369 Name == "kand.w" || // Added in 7.0
370 Name == "kandn.w" || // Added in 7.0
371 Name == "knot.w" || // Added in 7.0
372 Name == "kor.w" || // Added in 7.0
373 Name == "kortestc.w" || // Added in 7.0
374 Name == "kortestz.w" || // Added in 7.0
375 Name.starts_with("kunpck") || // added in 6.0
376 Name == "kxnor.w" || // Added in 7.0
377 Name == "kxor.w" || // Added in 7.0
378 Name.starts_with("padds.") || // Added in 8.0
379 Name.starts_with("pbroadcast") || // Added in 3.9
380 Name.starts_with("prol") || // Added in 8.0
381 Name.starts_with("pror") || // Added in 8.0
382 Name.starts_with("psll.dq") || // Added in 3.9
383 Name.starts_with("psrl.dq") || // Added in 3.9
384 Name.starts_with("psubs.") || // Added in 8.0
385 Name.starts_with("ptestm") || // Added in 6.0
386 Name.starts_with("ptestnm") || // Added in 6.0
387 Name.starts_with("storent.") || // Added in 3.9
388 Name.starts_with("vbroadcast.s") || // Added in 7.0
389 Name.starts_with("vpshld.") || // Added in 8.0
390 Name.starts_with("vpshrd.")); // Added in 8.0
391 }
392
393 if (Name.consume_front("fma."))
394 return (Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmsub.") || // Added in 7.0
396 Name.starts_with("vfmsubadd.") || // Added in 7.0
397 Name.starts_with("vfnmadd.") || // Added in 7.0
398 Name.starts_with("vfnmsub.")); // Added in 7.0
399
400 if (Name.consume_front("fma4."))
401 return Name.starts_with("vfmadd.s"); // Added in 7.0
402
403 if (Name.consume_front("sse."))
404 return (Name == "add.ss" || // Added in 4.0
405 Name == "cvtsi2ss" || // Added in 7.0
406 Name == "cvtsi642ss" || // Added in 7.0
407 Name == "div.ss" || // Added in 4.0
408 Name == "mul.ss" || // Added in 4.0
409 Name.starts_with("sqrt.p") || // Added in 7.0
410 Name == "sqrt.ss" || // Added in 7.0
411 Name.starts_with("storeu.") || // Added in 3.9
412 Name == "sub.ss"); // Added in 4.0
413
414 if (Name.consume_front("sse2."))
415 return (Name == "add.sd" || // Added in 4.0
416 Name == "cvtdq2pd" || // Added in 3.9
417 Name == "cvtdq2ps" || // Added in 7.0
418 Name == "cvtps2pd" || // Added in 3.9
419 Name == "cvtsi2sd" || // Added in 7.0
420 Name == "cvtsi642sd" || // Added in 7.0
421 Name == "cvtss2sd" || // Added in 7.0
422 Name == "div.sd" || // Added in 4.0
423 Name == "mul.sd" || // Added in 4.0
424 Name.starts_with("padds.") || // Added in 8.0
425 Name.starts_with("paddus.") || // Added in 8.0
426 Name.starts_with("pcmpeq.") || // Added in 3.1
427 Name.starts_with("pcmpgt.") || // Added in 3.1
428 Name == "pmaxs.w" || // Added in 3.9
429 Name == "pmaxu.b" || // Added in 3.9
430 Name == "pmins.w" || // Added in 3.9
431 Name == "pminu.b" || // Added in 3.9
432 Name == "pmulu.dq" || // Added in 7.0
433 Name.starts_with("pshuf") || // Added in 3.9
434 Name.starts_with("psll.dq") || // Added in 3.7
435 Name.starts_with("psrl.dq") || // Added in 3.7
436 Name.starts_with("psubs.") || // Added in 8.0
437 Name.starts_with("psubus.") || // Added in 8.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.sd" || // Added in 7.0
440 Name == "storel.dq" || // Added in 3.9
441 Name.starts_with("storeu.") || // Added in 3.9
442 Name == "sub.sd"); // Added in 4.0
443
444 if (Name.consume_front("sse41."))
445 return (Name.starts_with("blendp") || // Added in 3.7
446 Name == "movntdqa" || // Added in 5.0
447 Name == "pblendw" || // Added in 3.7
448 Name == "pmaxsb" || // Added in 3.9
449 Name == "pmaxsd" || // Added in 3.9
450 Name == "pmaxud" || // Added in 3.9
451 Name == "pmaxuw" || // Added in 3.9
452 Name == "pminsb" || // Added in 3.9
453 Name == "pminsd" || // Added in 3.9
454 Name == "pminud" || // Added in 3.9
455 Name == "pminuw" || // Added in 3.9
456 Name.starts_with("pmovsx") || // Added in 3.8
457 Name.starts_with("pmovzx") || // Added in 3.9
458 Name == "pmuldq"); // Added in 7.0
459
460 if (Name.consume_front("sse42."))
461 return Name == "crc32.64.8"; // Added in 3.4
462
463 if (Name.consume_front("sse4a."))
464 return Name.starts_with("movnt."); // Added in 3.9
465
466 if (Name.consume_front("ssse3."))
467 return (Name == "pabs.b.128" || // Added in 6.0
468 Name == "pabs.d.128" || // Added in 6.0
469 Name == "pabs.w.128"); // Added in 6.0
470
471 if (Name.consume_front("xop."))
472 return (Name == "vpcmov" || // Added in 3.8
473 Name == "vpcmov.256" || // Added in 5.0
474 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475 Name.starts_with("vprot")); // Added in 8.0
476
477 return (Name == "addcarry.u32" || // Added in 8.0
478 Name == "addcarry.u64" || // Added in 8.0
479 Name == "addcarryx.u32" || // Added in 8.0
480 Name == "addcarryx.u64" || // Added in 8.0
481 Name == "subborrow.u32" || // Added in 8.0
482 Name == "subborrow.u64" || // Added in 8.0
483 Name.starts_with("vcvtph2ps.")); // Added in 11.0
484}
485
487 Function *&NewFn) {
488 // Only handle intrinsics that start with "x86.".
489 if (!Name.consume_front("x86."))
490 return false;
491
493 NewFn = nullptr;
494 return true;
495 }
496
497 if (Name == "rdtscp") { // Added in 8.0
498 // If this intrinsic has 0 operands, it's the new version.
499 if (F->getFunctionType()->getNumParams() == 0)
500 return false;
501
502 rename(F);
503 NewFn = Intrinsic::getDeclaration(F->getParent(),
504 Intrinsic::x86_rdtscp);
505 return true;
506 }
507
509
510 // SSE4.1 ptest functions may have an old signature.
511 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
513 .Case("c", Intrinsic::x86_sse41_ptestc)
514 .Case("z", Intrinsic::x86_sse41_ptestz)
515 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
518 return upgradePTESTIntrinsic(F, ID, NewFn);
519
520 return false;
521 }
522
523 // Several blend and other instructions with masks used the wrong number of
524 // bits.
525
526 // Added in 3.6
528 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
536 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537
538 if (Name.consume_front("avx512.mask.cmp.")) {
539 // Added in 7.0
541 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
549 return upgradeX86MaskedFPCompare(F, ID, NewFn);
550 return false; // No other 'x86.avx523.mask.cmp.*'.
551 }
552
553 if (Name.consume_front("avx512bf16.")) {
554 // Added in 9.0
556 .Case("cvtne2ps2bf16.128",
557 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558 .Case("cvtne2ps2bf16.256",
559 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560 .Case("cvtne2ps2bf16.512",
561 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562 .Case("mask.cvtneps2bf16.128",
563 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564 .Case("cvtneps2bf16.256",
565 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566 .Case("cvtneps2bf16.512",
567 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
570 return upgradeX86BF16Intrinsic(F, ID, NewFn);
571
572 // Added in 9.0
574 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
579 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
580 return false; // No other 'x86.avx512bf16.*'.
581 }
582
583 if (Name.consume_front("xop.")) {
585 if (Name.starts_with("vpermil2")) { // Added in 3.9
586 // Upgrade any XOP PERMIL2 index operand still using a float/double
587 // vector.
588 auto Idx = F->getFunctionType()->getParamType(2);
589 if (Idx->isFPOrFPVectorTy()) {
590 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591 unsigned EltSize = Idx->getScalarSizeInBits();
592 if (EltSize == 64 && IdxSize == 128)
593 ID = Intrinsic::x86_xop_vpermil2pd;
594 else if (EltSize == 32 && IdxSize == 128)
595 ID = Intrinsic::x86_xop_vpermil2ps;
596 else if (EltSize == 64 && IdxSize == 256)
597 ID = Intrinsic::x86_xop_vpermil2pd_256;
598 else
599 ID = Intrinsic::x86_xop_vpermil2ps_256;
600 }
601 } else if (F->arg_size() == 2)
602 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
604 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
607
609 rename(F);
610 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611 return true;
612 }
613 return false; // No other 'x86.xop.*'
614 }
615
616 if (Name == "seh.recoverfp") {
617 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618 return true;
619 }
620
621 return false;
622}
623
624// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
628 Function *&NewFn) {
629 if (Name.starts_with("rbit")) {
630 // '(arm|aarch64).rbit'.
631 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
632 F->arg_begin()->getType());
633 return true;
634 }
635
636 if (Name == "thread.pointer") {
637 // '(arm|aarch64).thread.pointer'.
638 NewFn =
639 Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
640 return true;
641 }
642
643 bool Neon = Name.consume_front("neon.");
644 if (Neon) {
645 // '(arm|aarch64).neon.*'.
646 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647 // v16i8 respectively.
648 if (Name.consume_front("bfdot.")) {
649 // (arm|aarch64).neon.bfdot.*'.
651 .Cases("v2f32.v8i8", "v4f32.v16i8",
652 IsArm ? Intrinsic::arm_neon_bfdot
653 : Intrinsic::aarch64_neon_bfdot)
656 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
657 assert((OperandWidth == 64 || OperandWidth == 128) &&
658 "Unexpected operand width");
659 LLVMContext &Ctx = F->getParent()->getContext();
660 std::array<Type *, 2> Tys{
661 {F->getReturnType(),
662 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
663 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
664 return true;
665 }
666 return false; // No other '(arm|aarch64).neon.bfdot.*'.
667 }
668
669 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
670 // anymore and accept v8bf16 instead of v16i8.
671 if (Name.consume_front("bfm")) {
672 // (arm|aarch64).neon.bfm*'.
673 if (Name.consume_back(".v4f32.v16i8")) {
674 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
677 .Case("mla", IsArm ? Intrinsic::arm_neon_bfmmla
678 : Intrinsic::aarch64_neon_bfmmla)
679 .Case("lalb", IsArm ? Intrinsic::arm_neon_bfmlalb
680 : Intrinsic::aarch64_neon_bfmlalb)
681 .Case("lalt", IsArm ? Intrinsic::arm_neon_bfmlalt
682 : Intrinsic::aarch64_neon_bfmlalt)
685 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
686 return true;
687 }
688 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
689 }
690 return false; // No other '(arm|aarch64).neon.bfm*.
691 }
692 // Continue on to Aarch64 Neon or Arm Neon.
693 }
694 // Continue on to Arm or Aarch64.
695
696 if (IsArm) {
697 // 'arm.*'.
698 if (Neon) {
699 // 'arm.neon.*'.
701 .StartsWith("vclz.", Intrinsic::ctlz)
702 .StartsWith("vcnt.", Intrinsic::ctpop)
703 .StartsWith("vqadds.", Intrinsic::sadd_sat)
704 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
705 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
706 .StartsWith("vqsubu.", Intrinsic::usub_sat)
709 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
710 F->arg_begin()->getType());
711 return true;
712 }
713
714 if (Name.consume_front("vst")) {
715 // 'arm.neon.vst*'.
716 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
718 if (vstRegex.match(Name, &Groups)) {
719 static const Intrinsic::ID StoreInts[] = {
720 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
721 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
722
723 static const Intrinsic::ID StoreLaneInts[] = {
724 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
725 Intrinsic::arm_neon_vst4lane};
726
727 auto fArgs = F->getFunctionType()->params();
728 Type *Tys[] = {fArgs[0], fArgs[1]};
729 if (Groups[1].size() == 1)
730 NewFn = Intrinsic::getDeclaration(F->getParent(),
731 StoreInts[fArgs.size() - 3], Tys);
732 else
734 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
735 return true;
736 }
737 return false; // No other 'arm.neon.vst*'.
738 }
739
740 return false; // No other 'arm.neon.*'.
741 }
742
743 if (Name.consume_front("mve.")) {
744 // 'arm.mve.*'.
745 if (Name == "vctp64") {
746 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
747 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
748 // the function and deal with it below in UpgradeIntrinsicCall.
749 rename(F);
750 return true;
751 }
752 return false; // Not 'arm.mve.vctp64'.
753 }
754
755 // These too are changed to accept a v2i1 instead of the old v4i1.
756 if (Name.consume_back(".v4i1")) {
757 // 'arm.mve.*.v4i1'.
758 if (Name.consume_back(".predicated.v2i64.v4i32"))
759 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
760 return Name == "mull.int" || Name == "vqdmull";
761
762 if (Name.consume_back(".v2i64")) {
763 // 'arm.mve.*.v2i64.v4i1'
764 bool IsGather = Name.consume_front("vldr.gather.");
765 if (IsGather || Name.consume_front("vstr.scatter.")) {
766 if (Name.consume_front("base.")) {
767 // Optional 'wb.' prefix.
768 Name.consume_front("wb.");
769 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
770 // predicated.v2i64.v2i64.v4i1'.
771 return Name == "predicated.v2i64";
772 }
773
774 if (Name.consume_front("offset.predicated."))
775 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
776 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
777
778 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
779 return false;
780 }
781
782 return false; // No other 'arm.mve.*.v2i64.v4i1'.
783 }
784 return false; // No other 'arm.mve.*.v4i1'.
785 }
786 return false; // No other 'arm.mve.*'.
787 }
788
789 if (Name.consume_front("cde.vcx")) {
790 // 'arm.cde.vcx*'.
791 if (Name.consume_back(".predicated.v2i64.v4i1"))
792 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
793 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
794 Name == "3q" || Name == "3qa";
795
796 return false; // No other 'arm.cde.vcx*'.
797 }
798 } else {
799 // 'aarch64.*'.
800 if (Neon) {
801 // 'aarch64.neon.*'.
803 .StartsWith("frintn", Intrinsic::roundeven)
804 .StartsWith("rbit", Intrinsic::bitreverse)
807 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
808 F->arg_begin()->getType());
809 return true;
810 }
811
812 if (Name.starts_with("addp")) {
813 // 'aarch64.neon.addp*'.
814 if (F->arg_size() != 2)
815 return false; // Invalid IR.
816 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
817 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
818 NewFn = Intrinsic::getDeclaration(F->getParent(),
819 Intrinsic::aarch64_neon_faddp, Ty);
820 return true;
821 }
822 }
823 return false; // No other 'aarch64.neon.*'.
824 }
825 if (Name.consume_front("sve.")) {
826 // 'aarch64.sve.*'.
827 if (Name.consume_front("bf")) {
828 if (Name.consume_back(".lane")) {
829 // 'aarch64.sve.bf*.lane'.
832 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
833 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
834 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
837 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
838 return true;
839 }
840 return false; // No other 'aarch64.sve.bf*.lane'.
841 }
842 return false; // No other 'aarch64.sve.bf*'.
843 }
844
845 if (Name.consume_front("ld")) {
846 // 'aarch64.sve.ld*'.
847 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
848 if (LdRegex.match(Name)) {
849 Type *ScalarTy =
850 dyn_cast<VectorType>(F->getReturnType())->getElementType();
851 ElementCount EC = dyn_cast<VectorType>(F->arg_begin()->getType())
852 ->getElementCount();
853 Type *Ty = VectorType::get(ScalarTy, EC);
854 static const Intrinsic::ID LoadIDs[] = {
855 Intrinsic::aarch64_sve_ld2_sret,
856 Intrinsic::aarch64_sve_ld3_sret,
857 Intrinsic::aarch64_sve_ld4_sret,
858 };
859 NewFn = Intrinsic::getDeclaration(F->getParent(),
860 LoadIDs[Name[0] - '2'], Ty);
861 return true;
862 }
863 return false; // No other 'aarch64.sve.ld*'.
864 }
865
866 if (Name.consume_front("tuple.")) {
867 // 'aarch64.sve.tuple.*'.
868 if (Name.starts_with("get")) {
869 // 'aarch64.sve.tuple.get*'.
870 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
871 NewFn = Intrinsic::getDeclaration(F->getParent(),
872 Intrinsic::vector_extract, Tys);
873 return true;
874 }
875
876 if (Name.starts_with("set")) {
877 // 'aarch64.sve.tuple.set*'.
878 auto Args = F->getFunctionType()->params();
879 Type *Tys[] = {Args[0], Args[2], Args[1]};
880 NewFn = Intrinsic::getDeclaration(F->getParent(),
881 Intrinsic::vector_insert, Tys);
882 return true;
883 }
884
885 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
886 if (CreateTupleRegex.match(Name)) {
887 // 'aarch64.sve.tuple.create*'.
888 auto Args = F->getFunctionType()->params();
889 Type *Tys[] = {F->getReturnType(), Args[1]};
890 NewFn = Intrinsic::getDeclaration(F->getParent(),
891 Intrinsic::vector_insert, Tys);
892 return true;
893 }
894 return false; // No other 'aarch64.sve.tuple.*'.
895 }
896 return false; // No other 'aarch64.sve.*'.
897 }
898 }
899 return false; // No other 'arm.*', 'aarch64.*'.
900}
901
903 if (Name.consume_front("abs."))
905 .Case("bf16", Intrinsic::nvvm_abs_bf16)
906 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
908
909 if (Name.consume_front("fma.rn."))
911 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
912 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
913 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
914 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
915 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
916 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
917 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
918 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
919 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
920 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
921 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
922 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
924
925 if (Name.consume_front("fmax."))
927 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
928 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
929 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
930 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
931 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
932 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
933 .Case("ftz.nan.xorsign.abs.bf16",
934 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
935 .Case("ftz.nan.xorsign.abs.bf16x2",
936 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
937 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
938 .Case("ftz.xorsign.abs.bf16x2",
939 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
940 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
941 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
942 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
943 .Case("nan.xorsign.abs.bf16x2",
944 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
945 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
946 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
948
949 if (Name.consume_front("fmin."))
951 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
952 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
953 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
954 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
955 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
956 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
957 .Case("ftz.nan.xorsign.abs.bf16",
958 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
959 .Case("ftz.nan.xorsign.abs.bf16x2",
960 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
961 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
962 .Case("ftz.xorsign.abs.bf16x2",
963 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
964 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
965 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
966 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
967 .Case("nan.xorsign.abs.bf16x2",
968 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
969 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
970 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
972
973 if (Name.consume_front("neg."))
975 .Case("bf16", Intrinsic::nvvm_neg_bf16)
976 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
978
980}
981
983 assert(F && "Illegal to upgrade a non-existent Function.");
984
985 StringRef Name = F->getName();
986
987 // Quickly eliminate it, if it's not a candidate.
988 if (!Name.consume_front("llvm.") || Name.empty())
989 return false;
990
991 switch (Name[0]) {
992 default: break;
993 case 'a': {
994 bool IsArm = Name.consume_front("arm.");
995 if (IsArm || Name.consume_front("aarch64.")) {
996 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
997 return true;
998 break;
999 }
1000
1001 if (Name.consume_front("amdgcn.")) {
1002 if (Name == "alignbit") {
1003 // Target specific intrinsic became redundant
1004 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1005 {F->getReturnType()});
1006 return true;
1007 }
1008
1009 if (Name.consume_front("atomic.")) {
1010 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1011 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1012 // there's no new declaration.
1013 NewFn = nullptr;
1014 return true;
1015 }
1016 break; // No other 'amdgcn.atomic.*'
1017 }
1018
1019 if (Name.starts_with("ldexp.")) {
1020 // Target specific intrinsic became redundant
1022 F->getParent(), Intrinsic::ldexp,
1023 {F->getReturnType(), F->getArg(1)->getType()});
1024 return true;
1025 }
1026 break; // No other 'amdgcn.*'
1027 }
1028
1029 break;
1030 }
1031 case 'c': {
1032 if (F->arg_size() == 1) {
1034 .StartsWith("ctlz.", Intrinsic::ctlz)
1035 .StartsWith("cttz.", Intrinsic::cttz)
1038 rename(F);
1039 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
1040 F->arg_begin()->getType());
1041 return true;
1042 }
1043 }
1044
1045 if (F->arg_size() == 2 && Name.equals("coro.end")) {
1046 rename(F);
1047 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1048 return true;
1049 }
1050
1051 break;
1052 }
1053 case 'd':
1054 if (Name.consume_front("dbg.")) {
1055 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1056 rename(F);
1057 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1058 return true;
1059 }
1060 break; // No other 'dbg.*'.
1061 }
1062 break;
1063 case 'e':
1064 if (Name.consume_front("experimental.vector.")) {
1066 .StartsWith("extract.", Intrinsic::vector_extract)
1067 .StartsWith("insert.", Intrinsic::vector_insert)
1070 const auto *FT = F->getFunctionType();
1072 if (ID == Intrinsic::vector_extract)
1073 // Extracting overloads the return type.
1074 Tys.push_back(FT->getReturnType());
1075 Tys.push_back(FT->getParamType(0));
1076 if (ID == Intrinsic::vector_insert)
1077 // Inserting overloads the inserted type.
1078 Tys.push_back(FT->getParamType(1));
1079 rename(F);
1080 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1081 return true;
1082 }
1083
1084 if (Name.consume_front("reduce.")) {
1086 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1087 if (R.match(Name, &Groups))
1089 .Case("add", Intrinsic::vector_reduce_add)
1090 .Case("mul", Intrinsic::vector_reduce_mul)
1091 .Case("and", Intrinsic::vector_reduce_and)
1092 .Case("or", Intrinsic::vector_reduce_or)
1093 .Case("xor", Intrinsic::vector_reduce_xor)
1094 .Case("smax", Intrinsic::vector_reduce_smax)
1095 .Case("smin", Intrinsic::vector_reduce_smin)
1096 .Case("umax", Intrinsic::vector_reduce_umax)
1097 .Case("umin", Intrinsic::vector_reduce_umin)
1098 .Case("fmax", Intrinsic::vector_reduce_fmax)
1099 .Case("fmin", Intrinsic::vector_reduce_fmin)
1101
1102 bool V2 = false;
1104 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1105 Groups.clear();
1106 V2 = true;
1107 if (R2.match(Name, &Groups))
1109 .Case("fadd", Intrinsic::vector_reduce_fadd)
1110 .Case("fmul", Intrinsic::vector_reduce_fmul)
1112 }
1114 rename(F);
1115 auto Args = F->getFunctionType()->params();
1116 NewFn =
1117 Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1118 return true;
1119 }
1120 break; // No other 'expermental.vector.reduce.*'.
1121 }
1122 break; // No other 'experimental.vector.*'.
1123 }
1124 break; // No other 'e*'.
1125 case 'f':
1126 if (Name.starts_with("flt.rounds")) {
1127 rename(F);
1128 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1129 return true;
1130 }
1131 break;
1132 case 'i':
1133 if (Name.starts_with("invariant.group.barrier")) {
1134 // Rename invariant.group.barrier to launder.invariant.group
1135 auto Args = F->getFunctionType()->params();
1136 Type* ObjectPtr[1] = {Args[0]};
1137 rename(F);
1138 NewFn = Intrinsic::getDeclaration(F->getParent(),
1139 Intrinsic::launder_invariant_group, ObjectPtr);
1140 return true;
1141 }
1142 break;
1143 case 'm': {
1144 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1145 // alignment parameter to embedding the alignment as an attribute of
1146 // the pointer args.
1147 if (unsigned ID = StringSwitch<unsigned>(Name)
1148 .StartsWith("memcpy.", Intrinsic::memcpy)
1149 .StartsWith("memmove.", Intrinsic::memmove)
1150 .Default(0)) {
1151 if (F->arg_size() == 5) {
1152 rename(F);
1153 // Get the types of dest, src, and len
1154 ArrayRef<Type *> ParamTypes =
1155 F->getFunctionType()->params().slice(0, 3);
1156 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1157 return true;
1158 }
1159 }
1160 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1161 rename(F);
1162 // Get the types of dest, and len
1163 const auto *FT = F->getFunctionType();
1164 Type *ParamTypes[2] = {
1165 FT->getParamType(0), // Dest
1166 FT->getParamType(2) // len
1167 };
1168 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1169 ParamTypes);
1170 return true;
1171 }
1172 break;
1173 }
1174 case 'n': {
1175 if (Name.consume_front("nvvm.")) {
1176 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1177 if (F->arg_size() == 1) {
1178 Intrinsic::ID IID =
1180 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1181 .Case("clz.i", Intrinsic::ctlz)
1182 .Case("popc.i", Intrinsic::ctpop)
1184 if (IID != Intrinsic::not_intrinsic) {
1185 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1186 {F->getReturnType()});
1187 return true;
1188 }
1189 }
1190
1191 // Check for nvvm intrinsics that need a return type adjustment.
1192 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1194 if (IID != Intrinsic::not_intrinsic) {
1195 NewFn = nullptr;
1196 return true;
1197 }
1198 }
1199
1200 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1201 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1202 //
1203 // TODO: We could add lohi.i2d.
1204 bool Expand = false;
1205 if (Name.consume_front("abs."))
1206 // nvvm.abs.{i,ii}
1207 Expand = Name == "i" || Name == "ll";
1208 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1209 Expand = true;
1210 else if (Name.consume_front("max.") || Name.consume_front("min."))
1211 // nvvm.{min,max}.{i,ii,ui,ull}
1212 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1213 Name == "ui" || Name == "ull";
1214 else if (Name.consume_front("atomic.load.add."))
1215 // nvvm.atomic.load.add.{f32.p,f64.p}
1216 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1217 else
1218 Expand = false;
1219
1220 if (Expand) {
1221 NewFn = nullptr;
1222 return true;
1223 }
1224 break; // No other 'nvvm.*'.
1225 }
1226 break;
1227 }
1228 case 'o':
1229 // We only need to change the name to match the mangling including the
1230 // address space.
1231 if (Name.starts_with("objectsize.")) {
1232 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1233 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1234 F->getName() !=
1235 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1236 rename(F);
1237 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1238 Tys);
1239 return true;
1240 }
1241 }
1242 break;
1243
1244 case 'p':
1245 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1246 rename(F);
1248 F->getParent(), Intrinsic::ptr_annotation,
1249 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1250 return true;
1251 }
1252 break;
1253
1254 case 'r': {
1255 if (Name.consume_front("riscv.")) {
1258 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1259 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1260 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1261 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1264 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1265 rename(F);
1266 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1267 return true;
1268 }
1269 break; // No other applicable upgrades.
1270 }
1271
1273 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1274 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1277 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1278 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1279 rename(F);
1280 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1281 return true;
1282 }
1283 break; // No other applicable upgrades.
1284 }
1285
1287 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1288 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1289 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1290 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1291 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1292 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1295 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1296 rename(F);
1297 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1298 return true;
1299 }
1300 break; // No other applicable upgrades.
1301 }
1302 break; // No other 'riscv.*' intrinsics
1303 }
1304 } break;
1305
1306 case 's':
1307 if (Name == "stackprotectorcheck") {
1308 NewFn = nullptr;
1309 return true;
1310 }
1311 break;
1312
1313 case 'v': {
1314 if (Name == "var.annotation" && F->arg_size() == 4) {
1315 rename(F);
1317 F->getParent(), Intrinsic::var_annotation,
1318 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1319 return true;
1320 }
1321 break;
1322 }
1323
1324 case 'w':
1325 if (Name.consume_front("wasm.")) {
1328 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1329 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1330 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1333 rename(F);
1334 NewFn =
1335 Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1336 return true;
1337 }
1338
1339 if (Name.consume_front("dot.i8x16.i7x16.")) {
1341 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1342 .Case("add.signed",
1343 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1346 rename(F);
1347 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1348 return true;
1349 }
1350 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1351 }
1352 break; // No other 'wasm.*'.
1353 }
1354 break;
1355
1356 case 'x':
1357 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1358 return true;
1359 }
1360
1361 auto *ST = dyn_cast<StructType>(F->getReturnType());
1362 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1363 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1364 // Replace return type with literal non-packed struct. Only do this for
1365 // intrinsics declared to return a struct, not for intrinsics with
1366 // overloaded return type, in which case the exact struct type will be
1367 // mangled into the name.
1370 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1371 auto *FT = F->getFunctionType();
1372 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1373 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1374 std::string Name = F->getName().str();
1375 rename(F);
1376 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1377 Name, F->getParent());
1378
1379 // The new function may also need remangling.
1380 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1381 NewFn = *Result;
1382 return true;
1383 }
1384 }
1385
1386 // Remangle our intrinsic since we upgrade the mangling
1388 if (Result != std::nullopt) {
1389 NewFn = *Result;
1390 return true;
1391 }
1392
1393 // This may not belong here. This function is effectively being overloaded
1394 // to both detect an intrinsic which needs upgrading, and to provide the
1395 // upgraded form of the intrinsic. We should perhaps have two separate
1396 // functions for this.
1397 return false;
1398}
1399
1401 NewFn = nullptr;
1402 bool Upgraded = upgradeIntrinsicFunction1(F, NewFn);
1403 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1404
1405 // Upgrade intrinsic attributes. This does not change the function.
1406 if (NewFn)
1407 F = NewFn;
1408 if (Intrinsic::ID id = F->getIntrinsicID())
1409 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1410 return Upgraded;
1411}
1412
1414 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1415 GV->getName() == "llvm.global_dtors")) ||
1416 !GV->hasInitializer())
1417 return nullptr;
1418 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1419 if (!ATy)
1420 return nullptr;
1421 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1422 if (!STy || STy->getNumElements() != 2)
1423 return nullptr;
1424
1425 LLVMContext &C = GV->getContext();
1426 IRBuilder<> IRB(C);
1427 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1428 IRB.getPtrTy());
1429 Constant *Init = GV->getInitializer();
1430 unsigned N = Init->getNumOperands();
1431 std::vector<Constant *> NewCtors(N);
1432 for (unsigned i = 0; i != N; ++i) {
1433 auto Ctor = cast<Constant>(Init->getOperand(i));
1434 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1435 Ctor->getAggregateElement(1),
1437 }
1438 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1439
1440 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1441 NewInit, GV->getName());
1442}
1443
1444// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1445// to byte shuffles.
1447 unsigned Shift) {
1448 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1449 unsigned NumElts = ResultTy->getNumElements() * 8;
1450
1451 // Bitcast from a 64-bit element type to a byte element type.
1452 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1453 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1454
1455 // We'll be shuffling in zeroes.
1456 Value *Res = Constant::getNullValue(VecTy);
1457
1458 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1459 // we'll just return the zero vector.
1460 if (Shift < 16) {
1461 int Idxs[64];
1462 // 256/512-bit version is split into 2/4 16-byte lanes.
1463 for (unsigned l = 0; l != NumElts; l += 16)
1464 for (unsigned i = 0; i != 16; ++i) {
1465 unsigned Idx = NumElts + i - Shift;
1466 if (Idx < NumElts)
1467 Idx -= NumElts - 16; // end of lane, switch operand.
1468 Idxs[l + i] = Idx + l;
1469 }
1470
1471 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1472 }
1473
1474 // Bitcast back to a 64-bit element type.
1475 return Builder.CreateBitCast(Res, ResultTy, "cast");
1476}
1477
1478// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1479// to byte shuffles.
1481 unsigned Shift) {
1482 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1483 unsigned NumElts = ResultTy->getNumElements() * 8;
1484
1485 // Bitcast from a 64-bit element type to a byte element type.
1486 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1487 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1488
1489 // We'll be shuffling in zeroes.
1490 Value *Res = Constant::getNullValue(VecTy);
1491
1492 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1493 // we'll just return the zero vector.
1494 if (Shift < 16) {
1495 int Idxs[64];
1496 // 256/512-bit version is split into 2/4 16-byte lanes.
1497 for (unsigned l = 0; l != NumElts; l += 16)
1498 for (unsigned i = 0; i != 16; ++i) {
1499 unsigned Idx = i + Shift;
1500 if (Idx >= 16)
1501 Idx += NumElts - 16; // end of lane, switch operand.
1502 Idxs[l + i] = Idx + l;
1503 }
1504
1505 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1506 }
1507
1508 // Bitcast back to a 64-bit element type.
1509 return Builder.CreateBitCast(Res, ResultTy, "cast");
1510}
1511
1512static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1513 unsigned NumElts) {
1514 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1516 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1517 Mask = Builder.CreateBitCast(Mask, MaskTy);
1518
1519 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1520 // i8 and we need to extract down to the right number of elements.
1521 if (NumElts <= 4) {
1522 int Indices[4];
1523 for (unsigned i = 0; i != NumElts; ++i)
1524 Indices[i] = i;
1525 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1526 "extract");
1527 }
1528
1529 return Mask;
1530}
1531
1532static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1533 Value *Op1) {
1534 // If the mask is all ones just emit the first operation.
1535 if (const auto *C = dyn_cast<Constant>(Mask))
1536 if (C->isAllOnesValue())
1537 return Op0;
1538
1539 Mask = getX86MaskVec(Builder, Mask,
1540 cast<FixedVectorType>(Op0->getType())->getNumElements());
1541 return Builder.CreateSelect(Mask, Op0, Op1);
1542}
1543
1544static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1545 Value *Op1) {
1546 // If the mask is all ones just emit the first operation.
1547 if (const auto *C = dyn_cast<Constant>(Mask))
1548 if (C->isAllOnesValue())
1549 return Op0;
1550
1551 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1552 Mask->getType()->getIntegerBitWidth());
1553 Mask = Builder.CreateBitCast(Mask, MaskTy);
1554 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1555 return Builder.CreateSelect(Mask, Op0, Op1);
1556}
1557
1558// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1559// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1560// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1562 Value *Op1, Value *Shift,
1563 Value *Passthru, Value *Mask,
1564 bool IsVALIGN) {
1565 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1566
1567 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1568 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1569 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1570 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1571
1572 // Mask the immediate for VALIGN.
1573 if (IsVALIGN)
1574 ShiftVal &= (NumElts - 1);
1575
1576 // If palignr is shifting the pair of vectors more than the size of two
1577 // lanes, emit zero.
1578 if (ShiftVal >= 32)
1580
1581 // If palignr is shifting the pair of input vectors more than one lane,
1582 // but less than two lanes, convert to shifting in zeroes.
1583 if (ShiftVal > 16) {
1584 ShiftVal -= 16;
1585 Op1 = Op0;
1587 }
1588
1589 int Indices[64];
1590 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1591 for (unsigned l = 0; l < NumElts; l += 16) {
1592 for (unsigned i = 0; i != 16; ++i) {
1593 unsigned Idx = ShiftVal + i;
1594 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1595 Idx += NumElts - 16; // End of lane, switch operand.
1596 Indices[l + i] = Idx + l;
1597 }
1598 }
1599
1600 Value *Align = Builder.CreateShuffleVector(
1601 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1602
1603 return emitX86Select(Builder, Mask, Align, Passthru);
1604}
1605
1607 bool ZeroMask, bool IndexForm) {
1608 Type *Ty = CI.getType();
1609 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1610 unsigned EltWidth = Ty->getScalarSizeInBits();
1611 bool IsFloat = Ty->isFPOrFPVectorTy();
1612 Intrinsic::ID IID;
1613 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1614 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1615 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1616 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1617 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1618 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1619 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1620 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1621 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1622 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1623 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1624 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1625 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1626 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1627 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1628 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1629 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1630 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1631 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1632 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1633 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1634 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1635 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1636 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1637 else if (VecWidth == 128 && EltWidth == 16)
1638 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1639 else if (VecWidth == 256 && EltWidth == 16)
1640 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1641 else if (VecWidth == 512 && EltWidth == 16)
1642 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1643 else if (VecWidth == 128 && EltWidth == 8)
1644 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1645 else if (VecWidth == 256 && EltWidth == 8)
1646 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1647 else if (VecWidth == 512 && EltWidth == 8)
1648 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1649 else
1650 llvm_unreachable("Unexpected intrinsic");
1651
1652 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1653 CI.getArgOperand(2) };
1654
1655 // If this isn't index form we need to swap operand 0 and 1.
1656 if (!IndexForm)
1657 std::swap(Args[0], Args[1]);
1658
1659 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1660 Args);
1661 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1662 : Builder.CreateBitCast(CI.getArgOperand(1),
1663 Ty);
1664 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1665}
1666
1668 Intrinsic::ID IID) {
1669 Type *Ty = CI.getType();
1670 Value *Op0 = CI.getOperand(0);
1671 Value *Op1 = CI.getOperand(1);
1672 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1673 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1674
1675 if (CI.arg_size() == 4) { // For masked intrinsics.
1676 Value *VecSrc = CI.getOperand(2);
1677 Value *Mask = CI.getOperand(3);
1678 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1679 }
1680 return Res;
1681}
1682
1684 bool IsRotateRight) {
1685 Type *Ty = CI.getType();
1686 Value *Src = CI.getArgOperand(0);
1687 Value *Amt = CI.getArgOperand(1);
1688
1689 // Amount may be scalar immediate, in which case create a splat vector.
1690 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1691 // we only care about the lowest log2 bits anyway.
1692 if (Amt->getType() != Ty) {
1693 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1694 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1695 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1696 }
1697
1698 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1699 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1700 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1701
1702 if (CI.arg_size() == 4) { // For masked intrinsics.
1703 Value *VecSrc = CI.getOperand(2);
1704 Value *Mask = CI.getOperand(3);
1705 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1706 }
1707 return Res;
1708}
1709
1710static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1711 bool IsSigned) {
1712 Type *Ty = CI.getType();
1713 Value *LHS = CI.getArgOperand(0);
1714 Value *RHS = CI.getArgOperand(1);
1715
1716 CmpInst::Predicate Pred;
1717 switch (Imm) {
1718 case 0x0:
1719 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1720 break;
1721 case 0x1:
1722 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1723 break;
1724 case 0x2:
1725 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1726 break;
1727 case 0x3:
1728 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1729 break;
1730 case 0x4:
1731 Pred = ICmpInst::ICMP_EQ;
1732 break;
1733 case 0x5:
1734 Pred = ICmpInst::ICMP_NE;
1735 break;
1736 case 0x6:
1737 return Constant::getNullValue(Ty); // FALSE
1738 case 0x7:
1739 return Constant::getAllOnesValue(Ty); // TRUE
1740 default:
1741 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1742 }
1743
1744 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1745 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1746 return Ext;
1747}
1748
1750 bool IsShiftRight, bool ZeroMask) {
1751 Type *Ty = CI.getType();
1752 Value *Op0 = CI.getArgOperand(0);
1753 Value *Op1 = CI.getArgOperand(1);
1754 Value *Amt = CI.getArgOperand(2);
1755
1756 if (IsShiftRight)
1757 std::swap(Op0, Op1);
1758
1759 // Amount may be scalar immediate, in which case create a splat vector.
1760 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1761 // we only care about the lowest log2 bits anyway.
1762 if (Amt->getType() != Ty) {
1763 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1764 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1765 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1766 }
1767
1768 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1769 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1770 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1771
1772 unsigned NumArgs = CI.arg_size();
1773 if (NumArgs >= 4) { // For masked intrinsics.
1774 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1775 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1776 CI.getArgOperand(0);
1777 Value *Mask = CI.getOperand(NumArgs - 1);
1778 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1779 }
1780 return Res;
1781}
1782
1784 Value *Mask, bool Aligned) {
1785 // Cast the pointer to the right type.
1786 Ptr = Builder.CreateBitCast(Ptr,
1787 llvm::PointerType::getUnqual(Data->getType()));
1788 const Align Alignment =
1789 Aligned
1790 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1791 : Align(1);
1792
1793 // If the mask is all ones just emit a regular store.
1794 if (const auto *C = dyn_cast<Constant>(Mask))
1795 if (C->isAllOnesValue())
1796 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1797
1798 // Convert the mask from an integer type to a vector of i1.
1799 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1800 Mask = getX86MaskVec(Builder, Mask, NumElts);
1801 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1802}
1803
1805 Value *Passthru, Value *Mask, bool Aligned) {
1806 Type *ValTy = Passthru->getType();
1807 // Cast the pointer to the right type.
1809 const Align Alignment =
1810 Aligned
1811 ? Align(
1813 8)
1814 : Align(1);
1815
1816 // If the mask is all ones just emit a regular store.
1817 if (const auto *C = dyn_cast<Constant>(Mask))
1818 if (C->isAllOnesValue())
1819 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1820
1821 // Convert the mask from an integer type to a vector of i1.
1822 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1823 Mask = getX86MaskVec(Builder, Mask, NumElts);
1824 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1825}
1826
1827static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1828 Type *Ty = CI.getType();
1829 Value *Op0 = CI.getArgOperand(0);
1830 Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1831 Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1832 if (CI.arg_size() == 3)
1833 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1834 return Res;
1835}
1836
1837static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1838 Type *Ty = CI.getType();
1839
1840 // Arguments have a vXi32 type so cast to vXi64.
1841 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1842 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1843
1844 if (IsSigned) {
1845 // Shift left then arithmetic shift right.
1846 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1847 LHS = Builder.CreateShl(LHS, ShiftAmt);
1848 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1849 RHS = Builder.CreateShl(RHS, ShiftAmt);
1850 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1851 } else {
1852 // Clear the upper bits.
1853 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1854 LHS = Builder.CreateAnd(LHS, Mask);
1855 RHS = Builder.CreateAnd(RHS, Mask);
1856 }
1857
1858 Value *Res = Builder.CreateMul(LHS, RHS);
1859
1860 if (CI.arg_size() == 4)
1861 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1862
1863 return Res;
1864}
1865
1866// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1868 Value *Mask) {
1869 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1870 if (Mask) {
1871 const auto *C = dyn_cast<Constant>(Mask);
1872 if (!C || !C->isAllOnesValue())
1873 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1874 }
1875
1876 if (NumElts < 8) {
1877 int Indices[8];
1878 for (unsigned i = 0; i != NumElts; ++i)
1879 Indices[i] = i;
1880 for (unsigned i = NumElts; i != 8; ++i)
1881 Indices[i] = NumElts + i % NumElts;
1882 Vec = Builder.CreateShuffleVector(Vec,
1884 Indices);
1885 }
1886 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1887}
1888
1890 unsigned CC, bool Signed) {
1891 Value *Op0 = CI.getArgOperand(0);
1892 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1893
1894 Value *Cmp;
1895 if (CC == 3) {
1897 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1898 } else if (CC == 7) {
1900 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1901 } else {
1903 switch (CC) {
1904 default: llvm_unreachable("Unknown condition code");
1905 case 0: Pred = ICmpInst::ICMP_EQ; break;
1906 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1907 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1908 case 4: Pred = ICmpInst::ICMP_NE; break;
1909 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1910 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1911 }
1912 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1913 }
1914
1915 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1916
1917 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1918}
1919
1920// Replace a masked intrinsic with an older unmasked intrinsic.
1922 Intrinsic::ID IID) {
1923 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1924 Value *Rep = Builder.CreateCall(Intrin,
1925 { CI.getArgOperand(0), CI.getArgOperand(1) });
1926 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1927}
1928
1930 Value* A = CI.getArgOperand(0);
1931 Value* B = CI.getArgOperand(1);
1932 Value* Src = CI.getArgOperand(2);
1933 Value* Mask = CI.getArgOperand(3);
1934
1935 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1936 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1937 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1938 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1939 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1940 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1941}
1942
1944 Value* Op = CI.getArgOperand(0);
1945 Type* ReturnOp = CI.getType();
1946 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1947 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1948 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1949}
1950
1951// Replace intrinsic with unmasked version and a select.
1953 CallBase &CI, Value *&Rep) {
1954 Name = Name.substr(12); // Remove avx512.mask.
1955
1956 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1957 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1958 Intrinsic::ID IID;
1959 if (Name.starts_with("max.p")) {
1960 if (VecWidth == 128 && EltWidth == 32)
1961 IID = Intrinsic::x86_sse_max_ps;
1962 else if (VecWidth == 128 && EltWidth == 64)
1963 IID = Intrinsic::x86_sse2_max_pd;
1964 else if (VecWidth == 256 && EltWidth == 32)
1965 IID = Intrinsic::x86_avx_max_ps_256;
1966 else if (VecWidth == 256 && EltWidth == 64)
1967 IID = Intrinsic::x86_avx_max_pd_256;
1968 else
1969 llvm_unreachable("Unexpected intrinsic");
1970 } else if (Name.starts_with("min.p")) {
1971 if (VecWidth == 128 && EltWidth == 32)
1972 IID = Intrinsic::x86_sse_min_ps;
1973 else if (VecWidth == 128 && EltWidth == 64)
1974 IID = Intrinsic::x86_sse2_min_pd;
1975 else if (VecWidth == 256 && EltWidth == 32)
1976 IID = Intrinsic::x86_avx_min_ps_256;
1977 else if (VecWidth == 256 && EltWidth == 64)
1978 IID = Intrinsic::x86_avx_min_pd_256;
1979 else
1980 llvm_unreachable("Unexpected intrinsic");
1981 } else if (Name.starts_with("pshuf.b.")) {
1982 if (VecWidth == 128)
1983 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1984 else if (VecWidth == 256)
1985 IID = Intrinsic::x86_avx2_pshuf_b;
1986 else if (VecWidth == 512)
1987 IID = Intrinsic::x86_avx512_pshuf_b_512;
1988 else
1989 llvm_unreachable("Unexpected intrinsic");
1990 } else if (Name.starts_with("pmul.hr.sw.")) {
1991 if (VecWidth == 128)
1992 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1993 else if (VecWidth == 256)
1994 IID = Intrinsic::x86_avx2_pmul_hr_sw;
1995 else if (VecWidth == 512)
1996 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1997 else
1998 llvm_unreachable("Unexpected intrinsic");
1999 } else if (Name.starts_with("pmulh.w.")) {
2000 if (VecWidth == 128)
2001 IID = Intrinsic::x86_sse2_pmulh_w;
2002 else if (VecWidth == 256)
2003 IID = Intrinsic::x86_avx2_pmulh_w;
2004 else if (VecWidth == 512)
2005 IID = Intrinsic::x86_avx512_pmulh_w_512;
2006 else
2007 llvm_unreachable("Unexpected intrinsic");
2008 } else if (Name.starts_with("pmulhu.w.")) {
2009 if (VecWidth == 128)
2010 IID = Intrinsic::x86_sse2_pmulhu_w;
2011 else if (VecWidth == 256)
2012 IID = Intrinsic::x86_avx2_pmulhu_w;
2013 else if (VecWidth == 512)
2014 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2015 else
2016 llvm_unreachable("Unexpected intrinsic");
2017 } else if (Name.starts_with("pmaddw.d.")) {
2018 if (VecWidth == 128)
2019 IID = Intrinsic::x86_sse2_pmadd_wd;
2020 else if (VecWidth == 256)
2021 IID = Intrinsic::x86_avx2_pmadd_wd;
2022 else if (VecWidth == 512)
2023 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2024 else
2025 llvm_unreachable("Unexpected intrinsic");
2026 } else if (Name.starts_with("pmaddubs.w.")) {
2027 if (VecWidth == 128)
2028 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2029 else if (VecWidth == 256)
2030 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2031 else if (VecWidth == 512)
2032 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2033 else
2034 llvm_unreachable("Unexpected intrinsic");
2035 } else if (Name.starts_with("packsswb.")) {
2036 if (VecWidth == 128)
2037 IID = Intrinsic::x86_sse2_packsswb_128;
2038 else if (VecWidth == 256)
2039 IID = Intrinsic::x86_avx2_packsswb;
2040 else if (VecWidth == 512)
2041 IID = Intrinsic::x86_avx512_packsswb_512;
2042 else
2043 llvm_unreachable("Unexpected intrinsic");
2044 } else if (Name.starts_with("packssdw.")) {
2045 if (VecWidth == 128)
2046 IID = Intrinsic::x86_sse2_packssdw_128;
2047 else if (VecWidth == 256)
2048 IID = Intrinsic::x86_avx2_packssdw;
2049 else if (VecWidth == 512)
2050 IID = Intrinsic::x86_avx512_packssdw_512;
2051 else
2052 llvm_unreachable("Unexpected intrinsic");
2053 } else if (Name.starts_with("packuswb.")) {
2054 if (VecWidth == 128)
2055 IID = Intrinsic::x86_sse2_packuswb_128;
2056 else if (VecWidth == 256)
2057 IID = Intrinsic::x86_avx2_packuswb;
2058 else if (VecWidth == 512)
2059 IID = Intrinsic::x86_avx512_packuswb_512;
2060 else
2061 llvm_unreachable("Unexpected intrinsic");
2062 } else if (Name.starts_with("packusdw.")) {
2063 if (VecWidth == 128)
2064 IID = Intrinsic::x86_sse41_packusdw;
2065 else if (VecWidth == 256)
2066 IID = Intrinsic::x86_avx2_packusdw;
2067 else if (VecWidth == 512)
2068 IID = Intrinsic::x86_avx512_packusdw_512;
2069 else
2070 llvm_unreachable("Unexpected intrinsic");
2071 } else if (Name.starts_with("vpermilvar.")) {
2072 if (VecWidth == 128 && EltWidth == 32)
2073 IID = Intrinsic::x86_avx_vpermilvar_ps;
2074 else if (VecWidth == 128 && EltWidth == 64)
2075 IID = Intrinsic::x86_avx_vpermilvar_pd;
2076 else if (VecWidth == 256 && EltWidth == 32)
2077 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2078 else if (VecWidth == 256 && EltWidth == 64)
2079 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2080 else if (VecWidth == 512 && EltWidth == 32)
2081 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2082 else if (VecWidth == 512 && EltWidth == 64)
2083 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2084 else
2085 llvm_unreachable("Unexpected intrinsic");
2086 } else if (Name == "cvtpd2dq.256") {
2087 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2088 } else if (Name == "cvtpd2ps.256") {
2089 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2090 } else if (Name == "cvttpd2dq.256") {
2091 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2092 } else if (Name == "cvttps2dq.128") {
2093 IID = Intrinsic::x86_sse2_cvttps2dq;
2094 } else if (Name == "cvttps2dq.256") {
2095 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2096 } else if (Name.starts_with("permvar.")) {
2097 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2098 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2099 IID = Intrinsic::x86_avx2_permps;
2100 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2101 IID = Intrinsic::x86_avx2_permd;
2102 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2103 IID = Intrinsic::x86_avx512_permvar_df_256;
2104 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2105 IID = Intrinsic::x86_avx512_permvar_di_256;
2106 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2107 IID = Intrinsic::x86_avx512_permvar_sf_512;
2108 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2109 IID = Intrinsic::x86_avx512_permvar_si_512;
2110 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2111 IID = Intrinsic::x86_avx512_permvar_df_512;
2112 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2113 IID = Intrinsic::x86_avx512_permvar_di_512;
2114 else if (VecWidth == 128 && EltWidth == 16)
2115 IID = Intrinsic::x86_avx512_permvar_hi_128;
2116 else if (VecWidth == 256 && EltWidth == 16)
2117 IID = Intrinsic::x86_avx512_permvar_hi_256;
2118 else if (VecWidth == 512 && EltWidth == 16)
2119 IID = Intrinsic::x86_avx512_permvar_hi_512;
2120 else if (VecWidth == 128 && EltWidth == 8)
2121 IID = Intrinsic::x86_avx512_permvar_qi_128;
2122 else if (VecWidth == 256 && EltWidth == 8)
2123 IID = Intrinsic::x86_avx512_permvar_qi_256;
2124 else if (VecWidth == 512 && EltWidth == 8)
2125 IID = Intrinsic::x86_avx512_permvar_qi_512;
2126 else
2127 llvm_unreachable("Unexpected intrinsic");
2128 } else if (Name.starts_with("dbpsadbw.")) {
2129 if (VecWidth == 128)
2130 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2131 else if (VecWidth == 256)
2132 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2133 else if (VecWidth == 512)
2134 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2135 else
2136 llvm_unreachable("Unexpected intrinsic");
2137 } else if (Name.starts_with("pmultishift.qb.")) {
2138 if (VecWidth == 128)
2139 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2140 else if (VecWidth == 256)
2141 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2142 else if (VecWidth == 512)
2143 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2144 else
2145 llvm_unreachable("Unexpected intrinsic");
2146 } else if (Name.starts_with("conflict.")) {
2147 if (Name[9] == 'd' && VecWidth == 128)
2148 IID = Intrinsic::x86_avx512_conflict_d_128;
2149 else if (Name[9] == 'd' && VecWidth == 256)
2150 IID = Intrinsic::x86_avx512_conflict_d_256;
2151 else if (Name[9] == 'd' && VecWidth == 512)
2152 IID = Intrinsic::x86_avx512_conflict_d_512;
2153 else if (Name[9] == 'q' && VecWidth == 128)
2154 IID = Intrinsic::x86_avx512_conflict_q_128;
2155 else if (Name[9] == 'q' && VecWidth == 256)
2156 IID = Intrinsic::x86_avx512_conflict_q_256;
2157 else if (Name[9] == 'q' && VecWidth == 512)
2158 IID = Intrinsic::x86_avx512_conflict_q_512;
2159 else
2160 llvm_unreachable("Unexpected intrinsic");
2161 } else if (Name.starts_with("pavg.")) {
2162 if (Name[5] == 'b' && VecWidth == 128)
2163 IID = Intrinsic::x86_sse2_pavg_b;
2164 else if (Name[5] == 'b' && VecWidth == 256)
2165 IID = Intrinsic::x86_avx2_pavg_b;
2166 else if (Name[5] == 'b' && VecWidth == 512)
2167 IID = Intrinsic::x86_avx512_pavg_b_512;
2168 else if (Name[5] == 'w' && VecWidth == 128)
2169 IID = Intrinsic::x86_sse2_pavg_w;
2170 else if (Name[5] == 'w' && VecWidth == 256)
2171 IID = Intrinsic::x86_avx2_pavg_w;
2172 else if (Name[5] == 'w' && VecWidth == 512)
2173 IID = Intrinsic::x86_avx512_pavg_w_512;
2174 else
2175 llvm_unreachable("Unexpected intrinsic");
2176 } else
2177 return false;
2178
2179 SmallVector<Value *, 4> Args(CI.args());
2180 Args.pop_back();
2181 Args.pop_back();
2182 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2183 Args);
2184 unsigned NumArgs = CI.arg_size();
2185 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2186 CI.getArgOperand(NumArgs - 2));
2187 return true;
2188}
2189
2190/// Upgrade comment in call to inline asm that represents an objc retain release
2191/// marker.
2192void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2193 size_t Pos;
2194 if (AsmStr->find("mov\tfp") == 0 &&
2195 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2196 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2197 AsmStr->replace(Pos, 1, ";");
2198 }
2199}
2200
2202 IRBuilder<> &Builder) {
2203 if (Name == "mve.vctp64.old") {
2204 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
2205 // correct type.
2206 Value *VCTP = Builder.CreateCall(
2207 Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
2208 CI->getArgOperand(0), CI->getName());
2209 Value *C1 = Builder.CreateCall(
2211 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2212 {VectorType::get(Builder.getInt1Ty(), 2, false)}),
2213 VCTP);
2214 return Builder.CreateCall(
2216 F->getParent(), Intrinsic::arm_mve_pred_i2v,
2217 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2218 C1);
2219 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
2220 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
2221 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
2222 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
2223 Name ==
2224 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
2225 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
2226 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
2227 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
2228 Name ==
2229 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
2230 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
2231 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
2232 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
2233 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
2234 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
2235 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
2236 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
2237 std::vector<Type *> Tys;
2238 unsigned ID = CI->getIntrinsicID();
2239 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
2240 switch (ID) {
2241 case Intrinsic::arm_mve_mull_int_predicated:
2242 case Intrinsic::arm_mve_vqdmull_predicated:
2243 case Intrinsic::arm_mve_vldr_gather_base_predicated:
2244 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
2245 break;
2246 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
2247 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
2248 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
2249 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
2250 V2I1Ty};
2251 break;
2252 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
2253 Tys = {CI->getType(), CI->getOperand(0)->getType(),
2254 CI->getOperand(1)->getType(), V2I1Ty};
2255 break;
2256 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
2257 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
2258 CI->getOperand(2)->getType(), V2I1Ty};
2259 break;
2260 case Intrinsic::arm_cde_vcx1q_predicated:
2261 case Intrinsic::arm_cde_vcx1qa_predicated:
2262 case Intrinsic::arm_cde_vcx2q_predicated:
2263 case Intrinsic::arm_cde_vcx2qa_predicated:
2264 case Intrinsic::arm_cde_vcx3q_predicated:
2265 case Intrinsic::arm_cde_vcx3qa_predicated:
2266 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
2267 break;
2268 default:
2269 llvm_unreachable("Unhandled Intrinsic!");
2270 }
2271
2272 std::vector<Value *> Ops;
2273 for (Value *Op : CI->args()) {
2274 Type *Ty = Op->getType();
2275 if (Ty->getScalarSizeInBits() == 1) {
2276 Value *C1 = Builder.CreateCall(
2278 F->getParent(), Intrinsic::arm_mve_pred_v2i,
2279 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
2280 Op);
2281 Op = Builder.CreateCall(
2282 Intrinsic::getDeclaration(F->getParent(),
2283 Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
2284 C1);
2285 }
2286 Ops.push_back(Op);
2287 }
2288
2289 Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
2290 return Builder.CreateCall(Fn, Ops, CI->getName());
2291 }
2292 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
2293}
2294
2296 Function *F, IRBuilder<> &Builder) {
2297 const bool IsInc = Name.starts_with("atomic.inc.");
2298 if (IsInc || Name.starts_with("atomic.dec.")) {
2299 if (CI->getNumOperands() != 6) // Malformed bitcode.
2300 return nullptr;
2301
2302 AtomicRMWInst::BinOp RMWOp =
2304
2305 Value *Ptr = CI->getArgOperand(0);
2306 Value *Val = CI->getArgOperand(1);
2307 ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2308 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
2309
2310 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2311 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
2312 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
2313 if (Order == AtomicOrdering::NotAtomic ||
2314 Order == AtomicOrdering::Unordered)
2315 Order = AtomicOrdering::SequentiallyConsistent;
2316
2317 // The scope argument never really worked correctly. Use agent as the most
2318 // conservative option which should still always produce the instruction.
2319 SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID("agent");
2320 AtomicRMWInst *RMW =
2321 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
2322
2323 if (!VolatileArg || !VolatileArg->isZero())
2324 RMW->setVolatile(true);
2325 return RMW;
2326 }
2327
2328 llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
2329}
2330
2331/// Upgrade a call to an old intrinsic. All argument and return casting must be
2332/// provided to seamlessly integrate with existing context.
2334 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
2335 // checks the callee's function type matches. It's likely we need to handle
2336 // type changes here.
2337 Function *F = dyn_cast<Function>(CI->getCalledOperand());
2338 if (!F)
2339 return;
2340
2341 LLVMContext &C = CI->getContext();
2342 IRBuilder<> Builder(C);
2343 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
2344
2345 if (!NewFn) {
2346 // Get the Function's name.
2347 StringRef Name = F->getName();
2348
2349 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
2350 Name = Name.substr(5);
2351
2352 bool IsX86 = Name.consume_front("x86.");
2353 bool IsNVVM = Name.consume_front("nvvm.");
2354 bool IsARM = Name.consume_front("arm.");
2355 bool IsAMDGCN = Name.consume_front("amdgcn.");
2356
2357 if (IsX86 && Name.starts_with("sse4a.movnt.")) {
2359 Elts.push_back(
2360 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2361 MDNode *Node = MDNode::get(C, Elts);
2362
2363 Value *Arg0 = CI->getArgOperand(0);
2364 Value *Arg1 = CI->getArgOperand(1);
2365
2366 // Nontemporal (unaligned) store of the 0'th element of the float/double
2367 // vector.
2368 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2369 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2370 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2371 Value *Extract =
2372 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2373
2374 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2375 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2376
2377 // Remove intrinsic.
2378 CI->eraseFromParent();
2379 return;
2380 }
2381
2382 if (IsX86 && (Name.starts_with("avx.movnt.") ||
2383 Name.starts_with("avx512.storent."))) {
2385 Elts.push_back(
2386 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2387 MDNode *Node = MDNode::get(C, Elts);
2388
2389 Value *Arg0 = CI->getArgOperand(0);
2390 Value *Arg1 = CI->getArgOperand(1);
2391
2392 // Convert the type of the pointer to a pointer to the stored type.
2393 Value *BC = Builder.CreateBitCast(Arg0,
2394 PointerType::getUnqual(Arg1->getType()),
2395 "cast");
2396 StoreInst *SI = Builder.CreateAlignedStore(
2397 Arg1, BC,
2399 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2400
2401 // Remove intrinsic.
2402 CI->eraseFromParent();
2403 return;
2404 }
2405
2406 if (IsX86 && Name == "sse2.storel.dq") {
2407 Value *Arg0 = CI->getArgOperand(0);
2408 Value *Arg1 = CI->getArgOperand(1);
2409
2410 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2411 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2412 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2413 Value *BC = Builder.CreateBitCast(Arg0,
2414 PointerType::getUnqual(Elt->getType()),
2415 "cast");
2416 Builder.CreateAlignedStore(Elt, BC, Align(1));
2417
2418 // Remove intrinsic.
2419 CI->eraseFromParent();
2420 return;
2421 }
2422
2423 if (IsX86 && (Name.starts_with("sse.storeu.") ||
2424 Name.starts_with("sse2.storeu.") ||
2425 Name.starts_with("avx.storeu."))) {
2426 Value *Arg0 = CI->getArgOperand(0);
2427 Value *Arg1 = CI->getArgOperand(1);
2428
2429 Arg0 = Builder.CreateBitCast(Arg0,
2430 PointerType::getUnqual(Arg1->getType()),
2431 "cast");
2432 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2433
2434 // Remove intrinsic.
2435 CI->eraseFromParent();
2436 return;
2437 }
2438
2439 if (IsX86 && Name == "avx512.mask.store.ss") {
2440 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2441 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2442 Mask, false);
2443
2444 // Remove intrinsic.
2445 CI->eraseFromParent();
2446 return;
2447 }
2448
2449 if (IsX86 && Name.starts_with("avx512.mask.store")) {
2450 // "avx512.mask.storeu." or "avx512.mask.store."
2451 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2452 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2453 CI->getArgOperand(2), Aligned);
2454
2455 // Remove intrinsic.
2456 CI->eraseFromParent();
2457 return;
2458 }
2459
2460 Value *Rep;
2461 // Upgrade packed integer vector compare intrinsics to compare instructions.
2462 if (IsX86 && (Name.starts_with("sse2.pcmp") ||
2463 Name.starts_with("avx2.pcmp"))) {
2464 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2465 bool CmpEq = Name[9] == 'e';
2466 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2467 CI->getArgOperand(0), CI->getArgOperand(1));
2468 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2469 } else if (IsX86 && (Name.starts_with("avx512.broadcastm"))) {
2470 Type *ExtTy = Type::getInt32Ty(C);
2471 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2472 ExtTy = Type::getInt64Ty(C);
2473 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2474 ExtTy->getPrimitiveSizeInBits();
2475 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2476 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2477 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
2478 Name == "sse2.sqrt.sd")) {
2479 Value *Vec = CI->getArgOperand(0);
2480 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2481 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
2482 Intrinsic::sqrt, Elt0->getType());
2483 Elt0 = Builder.CreateCall(Intr, Elt0);
2484 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2485 } else if (IsX86 && (Name.starts_with("avx.sqrt.p") ||
2486 Name.starts_with("sse2.sqrt.p") ||
2487 Name.starts_with("sse.sqrt.p"))) {
2488 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2489 Intrinsic::sqrt,
2490 CI->getType()),
2491 {CI->getArgOperand(0)});
2492 } else if (IsX86 && (Name.starts_with("avx512.mask.sqrt.p"))) {
2493 if (CI->arg_size() == 4 &&
2494 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2495 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2496 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2497 : Intrinsic::x86_avx512_sqrt_pd_512;
2498
2499 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
2501 IID), Args);
2502 } else {
2503 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2504 Intrinsic::sqrt,
2505 CI->getType()),
2506 {CI->getArgOperand(0)});
2507 }
2508 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2509 CI->getArgOperand(1));
2510 } else if (IsX86 && (Name.starts_with("avx512.ptestm") ||
2511 Name.starts_with("avx512.ptestnm"))) {
2512 Value *Op0 = CI->getArgOperand(0);
2513 Value *Op1 = CI->getArgOperand(1);
2514 Value *Mask = CI->getArgOperand(2);
2515 Rep = Builder.CreateAnd(Op0, Op1);
2516 llvm::Type *Ty = Op0->getType();
2518 ICmpInst::Predicate Pred =
2519 Name.starts_with("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2520 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2521 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2522 } else if (IsX86 && (Name.starts_with("avx512.mask.pbroadcast"))){
2523 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2524 ->getNumElements();
2525 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2526 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2527 CI->getArgOperand(1));
2528 } else if (IsX86 && (Name.starts_with("avx512.kunpck"))) {
2529 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2530 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2531 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2532 int Indices[64];
2533 for (unsigned i = 0; i != NumElts; ++i)
2534 Indices[i] = i;
2535
2536 // First extract half of each vector. This gives better codegen than
2537 // doing it in a single shuffle.
2538 LHS =
2539 Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2540 RHS =
2541 Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2542 // Concat the vectors.
2543 // NOTE: Operands have to be swapped to match intrinsic definition.
2544 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2545 Rep = Builder.CreateBitCast(Rep, CI->getType());
2546 } else if (IsX86 && Name == "avx512.kand.w") {
2547 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2548 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2549 Rep = Builder.CreateAnd(LHS, RHS);
2550 Rep = Builder.CreateBitCast(Rep, CI->getType());
2551 } else if (IsX86 && Name == "avx512.kandn.w") {
2552 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2553 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2554 LHS = Builder.CreateNot(LHS);
2555 Rep = Builder.CreateAnd(LHS, RHS);
2556 Rep = Builder.CreateBitCast(Rep, CI->getType());
2557 } else if (IsX86 && Name == "avx512.kor.w") {
2558 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2559 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2560 Rep = Builder.CreateOr(LHS, RHS);
2561 Rep = Builder.CreateBitCast(Rep, CI->getType());
2562 } else if (IsX86 && Name == "avx512.kxor.w") {
2563 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2564 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2565 Rep = Builder.CreateXor(LHS, RHS);
2566 Rep = Builder.CreateBitCast(Rep, CI->getType());
2567 } else if (IsX86 && Name == "avx512.kxnor.w") {
2568 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2569 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2570 LHS = Builder.CreateNot(LHS);
2571 Rep = Builder.CreateXor(LHS, RHS);
2572 Rep = Builder.CreateBitCast(Rep, CI->getType());
2573 } else if (IsX86 && Name == "avx512.knot.w") {
2574 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2575 Rep = Builder.CreateNot(Rep);
2576 Rep = Builder.CreateBitCast(Rep, CI->getType());
2577 } else if (IsX86 &&
2578 (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
2579 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2580 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2581 Rep = Builder.CreateOr(LHS, RHS);
2582 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2583 Value *C;
2584 if (Name[14] == 'c')
2585 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2586 else
2587 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2588 Rep = Builder.CreateICmpEQ(Rep, C);
2589 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2590 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2591 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2592 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2593 Name == "sse.div.ss" || Name == "sse2.div.sd")) {
2594 Type *I32Ty = Type::getInt32Ty(C);
2595 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2596 ConstantInt::get(I32Ty, 0));
2597 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2598 ConstantInt::get(I32Ty, 0));
2599 Value *EltOp;
2600 if (Name.contains(".add."))
2601 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2602 else if (Name.contains(".sub."))
2603 EltOp = Builder.CreateFSub(Elt0, Elt1);
2604 else if (Name.contains(".mul."))
2605 EltOp = Builder.CreateFMul(Elt0, Elt1);
2606 else
2607 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2608 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2609 ConstantInt::get(I32Ty, 0));
2610 } else if (IsX86 && Name.starts_with("avx512.mask.pcmp")) {
2611 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2612 bool CmpEq = Name[16] == 'e';
2613 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2614 } else if (IsX86 && Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2615 Type *OpTy = CI->getArgOperand(0)->getType();
2616 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2617 Intrinsic::ID IID;
2618 switch (VecWidth) {
2619 default: llvm_unreachable("Unexpected intrinsic");
2620 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
2621 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
2622 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
2623 }
2624
2625 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2626 { CI->getOperand(0), CI->getArgOperand(1) });
2627 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2628 } else if (IsX86 && Name.starts_with("avx512.mask.fpclass.p")) {
2629 Type *OpTy = CI->getArgOperand(0)->getType();
2630 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2631 unsigned EltWidth = OpTy->getScalarSizeInBits();
2632 Intrinsic::ID IID;
2633 if (VecWidth == 128 && EltWidth == 32)
2634 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2635 else if (VecWidth == 256 && EltWidth == 32)
2636 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2637 else if (VecWidth == 512 && EltWidth == 32)
2638 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2639 else if (VecWidth == 128 && EltWidth == 64)
2640 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2641 else if (VecWidth == 256 && EltWidth == 64)
2642 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2643 else if (VecWidth == 512 && EltWidth == 64)
2644 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2645 else
2646 llvm_unreachable("Unexpected intrinsic");
2647
2648 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2649 { CI->getOperand(0), CI->getArgOperand(1) });
2650 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2651 } else if (IsX86 && Name.starts_with("avx512.cmp.p")) {
2652 SmallVector<Value *, 4> Args(CI->args());
2653 Type *OpTy = Args[0]->getType();
2654 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2655 unsigned EltWidth = OpTy->getScalarSizeInBits();
2656 Intrinsic::ID IID;
2657 if (VecWidth == 128 && EltWidth == 32)
2658 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2659 else if (VecWidth == 256 && EltWidth == 32)
2660 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2661 else if (VecWidth == 512 && EltWidth == 32)
2662 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2663 else if (VecWidth == 128 && EltWidth == 64)
2664 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2665 else if (VecWidth == 256 && EltWidth == 64)
2666 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2667 else if (VecWidth == 512 && EltWidth == 64)
2668 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2669 else
2670 llvm_unreachable("Unexpected intrinsic");
2671
2673 if (VecWidth == 512)
2674 std::swap(Mask, Args.back());
2675 Args.push_back(Mask);
2676
2677 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2678 Args);
2679 } else if (IsX86 && Name.starts_with("avx512.mask.cmp.")) {
2680 // Integer compare intrinsics.
2681 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2682 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2683 } else if (IsX86 && Name.starts_with("avx512.mask.ucmp.")) {
2684 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2685 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2686 } else if (IsX86 && (Name.starts_with("avx512.cvtb2mask.") ||
2687 Name.starts_with("avx512.cvtw2mask.") ||
2688 Name.starts_with("avx512.cvtd2mask.") ||
2689 Name.starts_with("avx512.cvtq2mask."))) {
2690 Value *Op = CI->getArgOperand(0);
2691 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2692 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2693 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2694 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
2695 Name == "ssse3.pabs.w.128" ||
2696 Name == "ssse3.pabs.d.128" ||
2697 Name.starts_with("avx2.pabs") ||
2698 Name.starts_with("avx512.mask.pabs"))) {
2699 Rep = upgradeAbs(Builder, *CI);
2700 } else if (IsX86 && (Name == "sse41.pmaxsb" ||
2701 Name == "sse2.pmaxs.w" ||
2702 Name == "sse41.pmaxsd" ||
2703 Name.starts_with("avx2.pmaxs") ||
2704 Name.starts_with("avx512.mask.pmaxs"))) {
2705 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2706 } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
2707 Name == "sse41.pmaxuw" ||
2708 Name == "sse41.pmaxud" ||
2709 Name.starts_with("avx2.pmaxu") ||
2710 Name.starts_with("avx512.mask.pmaxu"))) {
2711 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2712 } else if (IsX86 && (Name == "sse41.pminsb" ||
2713 Name == "sse2.pmins.w" ||
2714 Name == "sse41.pminsd" ||
2715 Name.starts_with("avx2.pmins") ||
2716 Name.starts_with("avx512.mask.pmins"))) {
2717 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2718 } else if (IsX86 && (Name == "sse2.pminu.b" ||
2719 Name == "sse41.pminuw" ||
2720 Name == "sse41.pminud" ||
2721 Name.starts_with("avx2.pminu") ||
2722 Name.starts_with("avx512.mask.pminu"))) {
2723 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2724 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2725 Name == "avx2.pmulu.dq" ||
2726 Name == "avx512.pmulu.dq.512" ||
2727 Name.starts_with("avx512.mask.pmulu.dq."))) {
2728 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2729 } else if (IsX86 && (Name == "sse41.pmuldq" ||
2730 Name == "avx2.pmul.dq" ||
2731 Name == "avx512.pmul.dq.512" ||
2732 Name.starts_with("avx512.mask.pmul.dq."))) {
2733 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2734 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2735 Name == "sse2.cvtsi2sd" ||
2736 Name == "sse.cvtsi642ss" ||
2737 Name == "sse2.cvtsi642sd")) {
2738 Rep = Builder.CreateSIToFP(
2739 CI->getArgOperand(1),
2740 cast<VectorType>(CI->getType())->getElementType());
2741 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2742 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2743 Rep = Builder.CreateUIToFP(
2744 CI->getArgOperand(1),
2745 cast<VectorType>(CI->getType())->getElementType());
2746 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2747 } else if (IsX86 && Name == "sse2.cvtss2sd") {
2748 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2749 Rep = Builder.CreateFPExt(
2750 Rep, cast<VectorType>(CI->getType())->getElementType());
2751 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2752 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2753 Name == "sse2.cvtdq2ps" ||
2754 Name == "avx.cvtdq2.pd.256" ||
2755 Name == "avx.cvtdq2.ps.256" ||
2756 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2757 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2758 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2759 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2760 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2761 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2762 Name == "avx512.mask.cvtqq2ps.256" ||
2763 Name == "avx512.mask.cvtqq2ps.512" ||
2764 Name == "avx512.mask.cvtuqq2ps.256" ||
2765 Name == "avx512.mask.cvtuqq2ps.512" ||
2766 Name == "sse2.cvtps2pd" ||
2767 Name == "avx.cvt.ps2.pd.256" ||
2768 Name == "avx512.mask.cvtps2pd.128" ||
2769 Name == "avx512.mask.cvtps2pd.256")) {
2770 auto *DstTy = cast<FixedVectorType>(CI->getType());
2771 Rep = CI->getArgOperand(0);
2772 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2773
2774 unsigned NumDstElts = DstTy->getNumElements();
2775 if (NumDstElts < SrcTy->getNumElements()) {
2776 assert(NumDstElts == 2 && "Unexpected vector size");
2777 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2778 }
2779
2780 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2781 bool IsUnsigned = Name.contains("cvtu");
2782 if (IsPS2PD)
2783 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2784 else if (CI->arg_size() == 4 &&
2785 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2786 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2787 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2788 : Intrinsic::x86_avx512_sitofp_round;
2790 { DstTy, SrcTy });
2791 Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2792 } else {
2793 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2794 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2795 }
2796
2797 if (CI->arg_size() >= 3)
2798 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2799 CI->getArgOperand(1));
2800 } else if (IsX86 && (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2801 Name.starts_with("vcvtph2ps."))) {
2802 auto *DstTy = cast<FixedVectorType>(CI->getType());
2803 Rep = CI->getArgOperand(0);
2804 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2805 unsigned NumDstElts = DstTy->getNumElements();
2806 if (NumDstElts != SrcTy->getNumElements()) {
2807 assert(NumDstElts == 4 && "Unexpected vector size");
2808 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2809 }
2810 Rep = Builder.CreateBitCast(
2811 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2812 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2813 if (CI->arg_size() >= 3)
2814 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2815 CI->getArgOperand(1));
2816 } else if (IsX86 && Name.starts_with("avx512.mask.load")) {
2817 // "avx512.mask.loadu." or "avx512.mask.load."
2818 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2819 Rep =
2820 upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2821 CI->getArgOperand(2), Aligned);
2822 } else if (IsX86 && Name.starts_with("avx512.mask.expand.load.")) {
2823 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2824 Type *PtrTy = ResultTy->getElementType();
2825
2826 // Cast the pointer to element type.
2827 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2829
2830 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2831 ResultTy->getNumElements());
2832
2833 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2834 Intrinsic::masked_expandload,
2835 ResultTy);
2836 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2837 } else if (IsX86 && Name.starts_with("avx512.mask.compress.store.")) {
2838 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2839 Type *PtrTy = ResultTy->getElementType();
2840
2841 // Cast the pointer to element type.
2842 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2844
2845 Value *MaskVec =
2846 getX86MaskVec(Builder, CI->getArgOperand(2),
2847 cast<FixedVectorType>(ResultTy)->getNumElements());
2848
2849 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2850 Intrinsic::masked_compressstore,
2851 ResultTy);
2852 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2853 } else if (IsX86 && (Name.starts_with("avx512.mask.compress.") ||
2854 Name.starts_with("avx512.mask.expand."))) {
2855 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2856
2857 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2858 ResultTy->getNumElements());
2859
2860 bool IsCompress = Name[12] == 'c';
2861 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2862 : Intrinsic::x86_avx512_mask_expand;
2863 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2864 Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2865 MaskVec });
2866 } else if (IsX86 && Name.starts_with("xop.vpcom")) {
2867 bool IsSigned;
2868 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2869 Name.ends_with("uq"))
2870 IsSigned = false;
2871 else if (Name.ends_with("b") || Name.ends_with("w") || Name.ends_with("d") ||
2872 Name.ends_with("q"))
2873 IsSigned = true;
2874 else
2875 llvm_unreachable("Unknown suffix");
2876
2877 unsigned Imm;
2878 if (CI->arg_size() == 3) {
2879 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2880 } else {
2881 Name = Name.substr(9); // strip off "xop.vpcom"
2882 if (Name.starts_with("lt"))
2883 Imm = 0;
2884 else if (Name.starts_with("le"))
2885 Imm = 1;
2886 else if (Name.starts_with("gt"))
2887 Imm = 2;
2888 else if (Name.starts_with("ge"))
2889 Imm = 3;
2890 else if (Name.starts_with("eq"))
2891 Imm = 4;
2892 else if (Name.starts_with("ne"))
2893 Imm = 5;
2894 else if (Name.starts_with("false"))
2895 Imm = 6;
2896 else if (Name.starts_with("true"))
2897 Imm = 7;
2898 else
2899 llvm_unreachable("Unknown condition");
2900 }
2901
2902 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2903 } else if (IsX86 && Name.starts_with("xop.vpcmov")) {
2904 Value *Sel = CI->getArgOperand(2);
2905 Value *NotSel = Builder.CreateNot(Sel);
2906 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2907 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2908 Rep = Builder.CreateOr(Sel0, Sel1);
2909 } else if (IsX86 && (Name.starts_with("xop.vprot") ||
2910 Name.starts_with("avx512.prol") ||
2911 Name.starts_with("avx512.mask.prol"))) {
2912 Rep = upgradeX86Rotate(Builder, *CI, false);
2913 } else if (IsX86 && (Name.starts_with("avx512.pror") ||
2914 Name.starts_with("avx512.mask.pror"))) {
2915 Rep = upgradeX86Rotate(Builder, *CI, true);
2916 } else if (IsX86 && (Name.starts_with("avx512.vpshld.") ||
2917 Name.starts_with("avx512.mask.vpshld") ||
2918 Name.starts_with("avx512.maskz.vpshld"))) {
2919 bool ZeroMask = Name[11] == 'z';
2920 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2921 } else if (IsX86 && (Name.starts_with("avx512.vpshrd.") ||
2922 Name.starts_with("avx512.mask.vpshrd") ||
2923 Name.starts_with("avx512.maskz.vpshrd"))) {
2924 bool ZeroMask = Name[11] == 'z';
2925 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2926 } else if (IsX86 && Name == "sse42.crc32.64.8") {
2927 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
2928 Intrinsic::x86_sse42_crc32_32_8);
2929 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2930 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2931 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2932 } else if (IsX86 && (Name.starts_with("avx.vbroadcast.s") ||
2933 Name.starts_with("avx512.vbroadcast.s"))) {
2934 // Replace broadcasts with a series of insertelements.
2935 auto *VecTy = cast<FixedVectorType>(CI->getType());
2936 Type *EltTy = VecTy->getElementType();
2937 unsigned EltNum = VecTy->getNumElements();
2938 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2939 Type *I32Ty = Type::getInt32Ty(C);
2940 Rep = PoisonValue::get(VecTy);
2941 for (unsigned I = 0; I < EltNum; ++I)
2942 Rep = Builder.CreateInsertElement(Rep, Load,
2943 ConstantInt::get(I32Ty, I));
2944 } else if (IsX86 && (Name.starts_with("sse41.pmovsx") ||
2945 Name.starts_with("sse41.pmovzx") ||
2946 Name.starts_with("avx2.pmovsx") ||
2947 Name.starts_with("avx2.pmovzx") ||
2948 Name.starts_with("avx512.mask.pmovsx") ||
2949 Name.starts_with("avx512.mask.pmovzx"))) {
2950 auto *DstTy = cast<FixedVectorType>(CI->getType());
2951 unsigned NumDstElts = DstTy->getNumElements();
2952
2953 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2954 SmallVector<int, 8> ShuffleMask(NumDstElts);
2955 for (unsigned i = 0; i != NumDstElts; ++i)
2956 ShuffleMask[i] = i;
2957
2958 Value *SV =
2959 Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2960
2961 bool DoSext = Name.contains("pmovsx");
2962 Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
2963 : Builder.CreateZExt(SV, DstTy);
2964 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2965 if (CI->arg_size() == 3)
2966 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2967 CI->getArgOperand(1));
2968 } else if (Name == "avx512.mask.pmov.qd.256" ||
2969 Name == "avx512.mask.pmov.qd.512" ||
2970 Name == "avx512.mask.pmov.wb.256" ||
2971 Name == "avx512.mask.pmov.wb.512") {
2972 Type *Ty = CI->getArgOperand(1)->getType();
2973 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2974 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2975 CI->getArgOperand(1));
2976 } else if (IsX86 && (Name.starts_with("avx.vbroadcastf128") ||
2977 Name == "avx2.vbroadcasti128")) {
2978 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2979 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2980 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2981 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2982 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2983 PointerType::getUnqual(VT));
2984 Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2985 if (NumSrcElts == 2)
2986 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2987 else
2988 Rep = Builder.CreateShuffleVector(
2989 Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2990 } else if (IsX86 && (Name.starts_with("avx512.mask.shuf.i") ||
2991 Name.starts_with("avx512.mask.shuf.f"))) {
2992 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2993 Type *VT = CI->getType();
2994 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2995 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2996 unsigned ControlBitsMask = NumLanes - 1;
2997 unsigned NumControlBits = NumLanes / 2;
2998 SmallVector<int, 8> ShuffleMask(0);
2999
3000 for (unsigned l = 0; l != NumLanes; ++l) {
3001 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3002 // We actually need the other source.
3003 if (l >= NumLanes / 2)
3004 LaneMask += NumLanes;
3005 for (unsigned i = 0; i != NumElementsInLane; ++i)
3006 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3007 }
3008 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3009 CI->getArgOperand(1), ShuffleMask);
3010 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3011 CI->getArgOperand(3));
3012 }else if (IsX86 && (Name.starts_with("avx512.mask.broadcastf") ||
3013 Name.starts_with("avx512.mask.broadcasti"))) {
3014 unsigned NumSrcElts =
3015 cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3016 ->getNumElements();
3017 unsigned NumDstElts =
3018 cast<FixedVectorType>(CI->getType())->getNumElements();
3019
3020 SmallVector<int, 8> ShuffleMask(NumDstElts);
3021 for (unsigned i = 0; i != NumDstElts; ++i)
3022 ShuffleMask[i] = i % NumSrcElts;
3023
3024 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3025 CI->getArgOperand(0),
3026 ShuffleMask);
3027 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3028 CI->getArgOperand(1));
3029 } else if (IsX86 && (Name.starts_with("avx2.pbroadcast") ||
3030 Name.starts_with("avx2.vbroadcast") ||
3031 Name.starts_with("avx512.pbroadcast") ||
3032 Name.starts_with("avx512.mask.broadcast.s"))) {
3033 // Replace vp?broadcasts with a vector shuffle.
3034 Value *Op = CI->getArgOperand(0);
3035 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3036 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3039 Rep = Builder.CreateShuffleVector(Op, M);
3040
3041 if (CI->arg_size() == 3)
3042 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3043 CI->getArgOperand(1));
3044 } else if (IsX86 && (Name.starts_with("sse2.padds.") ||
3045 Name.starts_with("avx2.padds.") ||
3046 Name.starts_with("avx512.padds.") ||
3047 Name.starts_with("avx512.mask.padds."))) {
3048 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3049 } else if (IsX86 && (Name.starts_with("sse2.psubs.") ||
3050 Name.starts_with("avx2.psubs.") ||
3051 Name.starts_with("avx512.psubs.") ||
3052 Name.starts_with("avx512.mask.psubs."))) {
3053 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3054 } else if (IsX86 && (Name.starts_with("sse2.paddus.") ||
3055 Name.starts_with("avx2.paddus.") ||
3056 Name.starts_with("avx512.mask.paddus."))) {
3057 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3058 } else if (IsX86 && (Name.starts_with("sse2.psubus.") ||
3059 Name.starts_with("avx2.psubus.") ||
3060 Name.starts_with("avx512.mask.psubus."))) {
3061 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3062 } else if (IsX86 && Name.starts_with("avx512.mask.palignr.")) {
3064 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3065 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4),
3066 false);
3067 } else if (IsX86 && Name.starts_with("avx512.mask.valign.")) {
3069 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3070 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4),
3071 true);
3072 } else if (IsX86 && (Name == "sse2.psll.dq" ||
3073 Name == "avx2.psll.dq")) {
3074 // 128/256-bit shift left specified in bits.
3075 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3076 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3077 Shift / 8); // Shift is in bits.
3078 } else if (IsX86 && (Name == "sse2.psrl.dq" ||
3079 Name == "avx2.psrl.dq")) {
3080 // 128/256-bit shift right specified in bits.
3081 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3082 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3083 Shift / 8); // Shift is in bits.
3084 } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
3085 Name == "avx2.psll.dq.bs" ||
3086 Name == "avx512.psll.dq.512")) {
3087 // 128/256/512-bit shift left specified in bytes.
3088 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3089 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3090 } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
3091 Name == "avx2.psrl.dq.bs" ||
3092 Name == "avx512.psrl.dq.512")) {
3093 // 128/256/512-bit shift right specified in bytes.
3094 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3095 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3096 } else if (IsX86 && (Name == "sse41.pblendw" ||
3097 Name.starts_with("sse41.blendp") ||
3098 Name.starts_with("avx.blend.p") ||
3099 Name == "avx2.pblendw" ||
3100 Name.starts_with("avx2.pblendd."))) {
3101 Value *Op0 = CI->getArgOperand(0);
3102 Value *Op1 = CI->getArgOperand(1);
3103 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3104 auto *VecTy = cast<FixedVectorType>(CI->getType());
3105 unsigned NumElts = VecTy->getNumElements();
3106
3107 SmallVector<int, 16> Idxs(NumElts);
3108 for (unsigned i = 0; i != NumElts; ++i)
3109 Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
3110
3111 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3112 } else if (IsX86 && (Name.starts_with("avx.vinsertf128.") ||
3113 Name == "avx2.vinserti128" ||
3114 Name.starts_with("avx512.mask.insert"))) {
3115 Value *Op0 = CI->getArgOperand(0);
3116 Value *Op1 = CI->getArgOperand(1);
3117 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3118 unsigned DstNumElts =
3119 cast<FixedVectorType>(CI->getType())->getNumElements();
3120 unsigned SrcNumElts =
3121 cast<FixedVectorType>(Op1->getType())->getNumElements();
3122 unsigned Scale = DstNumElts / SrcNumElts;
3123
3124 // Mask off the high bits of the immediate value; hardware ignores those.
3125 Imm = Imm % Scale;
3126
3127 // Extend the second operand into a vector the size of the destination.
3128 SmallVector<int, 8> Idxs(DstNumElts);
3129 for (unsigned i = 0; i != SrcNumElts; ++i)
3130 Idxs[i] = i;
3131 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3132 Idxs[i] = SrcNumElts;
3133 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3134
3135 // Insert the second operand into the first operand.
3136
3137 // Note that there is no guarantee that instruction lowering will actually
3138 // produce a vinsertf128 instruction for the created shuffles. In
3139 // particular, the 0 immediate case involves no lane changes, so it can
3140 // be handled as a blend.
3141
3142 // Example of shuffle mask for 32-bit elements:
3143 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3144 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3145
3146 // First fill with identify mask.
3147 for (unsigned i = 0; i != DstNumElts; ++i)
3148 Idxs[i] = i;
3149 // Then replace the elements where we need to insert.
3150 for (unsigned i = 0; i != SrcNumElts; ++i)
3151 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3152 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3153
3154 // If the intrinsic has a mask operand, handle that.
3155 if (CI->arg_size() == 5)
3156 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3157 CI->getArgOperand(3));
3158 } else if (IsX86 && (Name.starts_with("avx.vextractf128.") ||
3159 Name == "avx2.vextracti128" ||
3160 Name.starts_with("avx512.mask.vextract"))) {
3161 Value *Op0 = CI->getArgOperand(0);
3162 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3163 unsigned DstNumElts =
3164 cast<FixedVectorType>(CI->getType())->getNumElements();
3165 unsigned SrcNumElts =
3166 cast<FixedVectorType>(Op0->getType())->getNumElements();
3167 unsigned Scale = SrcNumElts / DstNumElts;
3168
3169 // Mask off the high bits of the immediate value; hardware ignores those.
3170 Imm = Imm % Scale;
3171
3172 // Get indexes for the subvector of the input vector.
3173 SmallVector<int, 8> Idxs(DstNumElts);
3174 for (unsigned i = 0; i != DstNumElts; ++i) {
3175 Idxs[i] = i + (Imm * DstNumElts);
3176 }
3177 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3178
3179 // If the intrinsic has a mask operand, handle that.
3180 if (CI->arg_size() == 4)
3181 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3182 CI->getArgOperand(2));
3183 } else if (!IsX86 && Name == "stackprotectorcheck") {
3184 Rep = nullptr;
3185 } else if (IsX86 && (Name.starts_with("avx512.mask.perm.df.") ||
3186 Name.starts_with("avx512.mask.perm.di."))) {
3187 Value *Op0 = CI->getArgOperand(0);
3188 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3189 auto *VecTy = cast<FixedVectorType>(CI->getType());
3190 unsigned NumElts = VecTy->getNumElements();
3191
3192 SmallVector<int, 8> Idxs(NumElts);
3193 for (unsigned i = 0; i != NumElts; ++i)
3194 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3195
3196 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3197
3198 if (CI->arg_size() == 4)
3199 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3200 CI->getArgOperand(2));
3201 } else if (IsX86 && (Name.starts_with("avx.vperm2f128.") ||
3202 Name == "avx2.vperm2i128")) {
3203 // The immediate permute control byte looks like this:
3204 // [1:0] - select 128 bits from sources for low half of destination
3205 // [2] - ignore
3206 // [3] - zero low half of destination
3207 // [5:4] - select 128 bits from sources for high half of destination
3208 // [6] - ignore
3209 // [7] - zero high half of destination
3210
3211 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3212
3213 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3214 unsigned HalfSize = NumElts / 2;
3215 SmallVector<int, 8> ShuffleMask(NumElts);
3216
3217 // Determine which operand(s) are actually in use for this instruction.
3218 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3219 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3220
3221 // If needed, replace operands based on zero mask.
3222 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3223 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3224
3225 // Permute low half of result.
3226 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3227 for (unsigned i = 0; i < HalfSize; ++i)
3228 ShuffleMask[i] = StartIndex + i;
3229
3230 // Permute high half of result.
3231 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3232 for (unsigned i = 0; i < HalfSize; ++i)
3233 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3234
3235 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3236
3237 } else if (IsX86 && (Name.starts_with("avx.vpermil.") ||
3238 Name == "sse2.pshuf.d" ||
3239 Name.starts_with("avx512.mask.vpermil.p") ||
3240 Name.starts_with("avx512.mask.pshuf.d."))) {
3241 Value *Op0 = CI->getArgOperand(0);
3242 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3243 auto *VecTy = cast<FixedVectorType>(CI->getType());
3244 unsigned NumElts = VecTy->getNumElements();
3245 // Calculate the size of each index in the immediate.
3246 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3247 unsigned IdxMask = ((1 << IdxSize) - 1);
3248
3249 SmallVector<int, 8> Idxs(NumElts);
3250 // Lookup the bits for this element, wrapping around the immediate every
3251 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3252 // to offset by the first index of each group.
3253 for (unsigned i = 0; i != NumElts; ++i)
3254 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3255
3256 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3257
3258 if (CI->arg_size() == 4)
3259 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3260 CI->getArgOperand(2));
3261 } else if (IsX86 && (Name == "sse2.pshufl.w" ||
3262 Name.starts_with("avx512.mask.pshufl.w."))) {
3263 Value *Op0 = CI->getArgOperand(0);
3264 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3265 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3266
3267 SmallVector<int, 16> Idxs(NumElts);
3268 for (unsigned l = 0; l != NumElts; l += 8) {
3269 for (unsigned i = 0; i != 4; ++i)
3270 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3271 for (unsigned i = 4; i != 8; ++i)
3272 Idxs[i + l] = i + l;
3273 }
3274
3275 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3276
3277 if (CI->arg_size() == 4)
3278 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3279 CI->getArgOperand(2));
3280 } else if (IsX86 && (Name == "sse2.pshufh.w" ||
3281 Name.starts_with("avx512.mask.pshufh.w."))) {
3282 Value *Op0 = CI->getArgOperand(0);
3283 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3284 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3285
3286 SmallVector<int, 16> Idxs(NumElts);
3287 for (unsigned l = 0; l != NumElts; l += 8) {
3288 for (unsigned i = 0; i != 4; ++i)
3289 Idxs[i + l] = i + l;
3290 for (unsigned i = 0; i != 4; ++i)
3291 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3292 }
3293
3294 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3295
3296 if (CI->arg_size() == 4)
3297 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3298 CI->getArgOperand(2));
3299 } else if (IsX86 && Name.starts_with("avx512.mask.shuf.p")) {
3300 Value *Op0 = CI->getArgOperand(0);
3301 Value *Op1 = CI->getArgOperand(1);
3302 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3303 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3304
3305 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3306 unsigned HalfLaneElts = NumLaneElts / 2;
3307
3308 SmallVector<int, 16> Idxs(NumElts);
3309 for (unsigned i = 0; i != NumElts; ++i) {
3310 // Base index is the starting element of the lane.
3311 Idxs[i] = i - (i % NumLaneElts);
3312 // If we are half way through the lane switch to the other source.
3313 if ((i % NumLaneElts) >= HalfLaneElts)
3314 Idxs[i] += NumElts;
3315 // Now select the specific element. By adding HalfLaneElts bits from
3316 // the immediate. Wrapping around the immediate every 8-bits.
3317 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3318 }
3319
3320 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3321
3322 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3323 CI->getArgOperand(3));
3324 } else if (IsX86 && (Name.starts_with("avx512.mask.movddup") ||
3325 Name.starts_with("avx512.mask.movshdup") ||
3326 Name.starts_with("avx512.mask.movsldup"))) {
3327 Value *Op0 = CI->getArgOperand(0);
3328 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3329 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3330
3331 unsigned Offset = 0;
3332 if (Name.starts_with("avx512.mask.movshdup."))
3333 Offset = 1;
3334
3335 SmallVector<int, 16> Idxs(NumElts);
3336 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3337 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3338 Idxs[i + l + 0] = i + l + Offset;
3339 Idxs[i + l + 1] = i + l + Offset;
3340 }
3341
3342 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3343
3344 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3345 CI->getArgOperand(1));
3346 } else if (IsX86 && (Name.starts_with("avx512.mask.punpckl") ||
3347 Name.starts_with("avx512.mask.unpckl."))) {
3348 Value *Op0 = CI->getArgOperand(0);
3349 Value *Op1 = CI->getArgOperand(1);
3350 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3351 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3352
3353 SmallVector<int, 64> Idxs(NumElts);
3354 for (int l = 0; l != NumElts; l += NumLaneElts)
3355 for (int i = 0; i != NumLaneElts; ++i)
3356 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3357
3358 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3359
3360 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3361 CI->getArgOperand(2));
3362 } else if (IsX86 && (Name.starts_with("avx512.mask.punpckh") ||
3363 Name.starts_with("avx512.mask.unpckh."))) {
3364 Value *Op0 = CI->getArgOperand(0);
3365 Value *Op1 = CI->getArgOperand(1);
3366 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3367 int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
3368
3369 SmallVector<int, 64> Idxs(NumElts);
3370 for (int l = 0; l != NumElts; l += NumLaneElts)
3371 for (int i = 0; i != NumLaneElts; ++i)
3372 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3373
3374 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3375
3376 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3377 CI->getArgOperand(2));
3378 } else if (IsX86 && (Name.starts_with("avx512.mask.and.") ||
3379 Name.starts_with("avx512.mask.pand."))) {
3380 VectorType *FTy = cast<VectorType>(CI->getType());
3381 VectorType *ITy = VectorType::getInteger(FTy);
3382 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3383 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3384 Rep = Builder.CreateBitCast(Rep, FTy);
3385 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3386 CI->getArgOperand(2));
3387 } else if (IsX86 && (Name.starts_with("avx512.mask.andn.") ||
3388 Name.starts_with("avx512.mask.pandn."))) {
3389 VectorType *FTy = cast<VectorType>(CI->getType());
3390 VectorType *ITy = VectorType::getInteger(FTy);
3391 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3392 Rep = Builder.CreateAnd(Rep,
3393 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3394 Rep = Builder.CreateBitCast(Rep, FTy);
3395 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3396 CI->getArgOperand(2));
3397 } else if (IsX86 && (Name.starts_with("avx512.mask.or.") ||
3398 Name.starts_with("avx512.mask.por."))) {
3399 VectorType *FTy = cast<VectorType>(CI->getType());
3400 VectorType *ITy = VectorType::getInteger(FTy);
3401 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3402 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3403 Rep = Builder.CreateBitCast(Rep, FTy);
3404 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3405 CI->getArgOperand(2));
3406 } else if (IsX86 && (Name.starts_with("avx512.mask.xor.") ||
3407 Name.starts_with("avx512.mask.pxor."))) {
3408 VectorType *FTy = cast<VectorType>(CI->getType());
3409 VectorType *ITy = VectorType::getInteger(FTy);
3410 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3411 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3412 Rep = Builder.CreateBitCast(Rep, FTy);
3413 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3414 CI->getArgOperand(2));
3415 } else if (IsX86 && Name.starts_with("avx512.mask.padd.")) {
3416 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3417 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3418 CI->getArgOperand(2));
3419 } else if (IsX86 && Name.starts_with("avx512.mask.psub.")) {
3420 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3421 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3422 CI->getArgOperand(2));
3423 } else if (IsX86 && Name.starts_with("avx512.mask.pmull.")) {
3424 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3425 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3426 CI->getArgOperand(2));
3427 } else if (IsX86 && Name.starts_with("avx512.mask.add.p")) {
3428 if (Name.ends_with(".512")) {
3429 Intrinsic::ID IID;
3430 if (Name[17] == 's')
3431 IID = Intrinsic::x86_avx512_add_ps_512;
3432 else
3433 IID = Intrinsic::x86_avx512_add_pd_512;
3434
3435 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3436 { CI->getArgOperand(0), CI->getArgOperand(1),
3437 CI->getArgOperand(4) });
3438 } else {
3439 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3440 }
3441 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3442 CI->getArgOperand(2));
3443 } else if (IsX86 && Name.starts_with("avx512.mask.div.p")) {
3444 if (Name.ends_with(".512")) {
3445 Intrinsic::ID IID;
3446 if (Name[17] == 's')
3447 IID = Intrinsic::x86_avx512_div_ps_512;
3448 else
3449 IID = Intrinsic::x86_avx512_div_pd_512;
3450
3451 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3452 { CI->getArgOperand(0), CI->getArgOperand(1),
3453 CI->getArgOperand(4) });
3454 } else {
3455 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3456 }
3457 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3458 CI->getArgOperand(2));
3459 } else if (IsX86 && Name.starts_with("avx512.mask.mul.p")) {
3460 if (Name.ends_with(".512")) {
3461 Intrinsic::ID IID;
3462 if (Name[17] == 's')
3463 IID = Intrinsic::x86_avx512_mul_ps_512;
3464 else
3465 IID = Intrinsic::x86_avx512_mul_pd_512;
3466
3467 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3468 { CI->getArgOperand(0), CI->getArgOperand(1),
3469 CI->getArgOperand(4) });
3470 } else {
3471 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3472 }
3473 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3474 CI->getArgOperand(2));
3475 } else if (IsX86 && Name.starts_with("avx512.mask.sub.p")) {
3476 if (Name.ends_with(".512")) {
3477 Intrinsic::ID IID;
3478 if (Name[17] == 's')
3479 IID = Intrinsic::x86_avx512_sub_ps_512;
3480 else
3481 IID = Intrinsic::x86_avx512_sub_pd_512;
3482
3483 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3484 { CI->getArgOperand(0), CI->getArgOperand(1),
3485 CI->getArgOperand(4) });
3486 } else {
3487 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3488 }
3489 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3490 CI->getArgOperand(2));
3491 } else if (IsX86 && (Name.starts_with("avx512.mask.max.p") ||
3492 Name.starts_with("avx512.mask.min.p")) &&
3493 Name.drop_front(18) == ".512") {
3494 bool IsDouble = Name[17] == 'd';
3495 bool IsMin = Name[13] == 'i';
3496 static const Intrinsic::ID MinMaxTbl[2][2] = {
3497 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
3498 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
3499 };
3500 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3501
3502 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3503 { CI->getArgOperand(0), CI->getArgOperand(1),
3504 CI->getArgOperand(4) });
3505 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3506 CI->getArgOperand(2));
3507 } else if (IsX86 && Name.starts_with("avx512.mask.lzcnt.")) {
3508 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
3509 Intrinsic::ctlz,
3510 CI->getType()),
3511 { CI->getArgOperand(0), Builder.getInt1(false) });
3512 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3513 CI->getArgOperand(1));
3514 } else if (IsX86 && Name.starts_with("avx512.mask.psll")) {
3515 bool IsImmediate = Name[16] == 'i' ||
3516 (Name.size() > 18 && Name[18] == 'i');
3517 bool IsVariable = Name[16] == 'v';
3518 char Size = Name[16] == '.' ? Name[17] :
3519 Name[17] == '.' ? Name[18] :
3520 Name[18] == '.' ? Name[19] :
3521 Name[20];
3522
3523 Intrinsic::ID IID;
3524 if (IsVariable && Name[17] != '.') {
3525 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3526 IID = Intrinsic::x86_avx2_psllv_q;
3527 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3528 IID = Intrinsic::x86_avx2_psllv_q_256;
3529 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3530 IID = Intrinsic::x86_avx2_psllv_d;
3531 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3532 IID = Intrinsic::x86_avx2_psllv_d_256;
3533 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3534 IID = Intrinsic::x86_avx512_psllv_w_128;
3535 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3536 IID = Intrinsic::x86_avx512_psllv_w_256;
3537 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3538 IID = Intrinsic::x86_avx512_psllv_w_512;
3539 else
3540 llvm_unreachable("Unexpected size");
3541 } else if (Name.ends_with(".128")) {
3542 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3543 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3544 : Intrinsic::x86_sse2_psll_d;
3545 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3546 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3547 : Intrinsic::x86_sse2_psll_q;
3548 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3549 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3550 : Intrinsic::x86_sse2_psll_w;
3551 else
3552 llvm_unreachable("Unexpected size");
3553 } else if (Name.ends_with(".256")) {
3554 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3555 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3556 : Intrinsic::x86_avx2_psll_d;
3557 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3558 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3559 : Intrinsic::x86_avx2_psll_q;
3560 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3561 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3562 : Intrinsic::x86_avx2_psll_w;
3563 else
3564 llvm_unreachable("Unexpected size");
3565 } else {
3566 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3567 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
3568 IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
3569 Intrinsic::x86_avx512_psll_d_512;
3570 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3571 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
3572 IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
3573 Intrinsic::x86_avx512_psll_q_512;
3574 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3575 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3576 : Intrinsic::x86_avx512_psll_w_512;
3577 else
3578 llvm_unreachable("Unexpected size");
3579 }
3580
3581 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3582 } else if (IsX86 && Name.starts_with("avx512.mask.psrl")) {
3583 bool IsImmediate = Name[16] == 'i' ||
3584 (Name.size() > 18 && Name[18] == 'i');
3585 bool IsVariable = Name[16] == 'v';
3586 char Size = Name[16] == '.' ? Name[17] :
3587 Name[17] == '.' ? Name[18] :
3588 Name[18] == '.' ? Name[19] :
3589 Name[20];
3590
3591 Intrinsic::ID IID;
3592 if (IsVariable && Name[17] != '.') {
3593 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3594 IID = Intrinsic::x86_avx2_psrlv_q;
3595 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3596 IID = Intrinsic::x86_avx2_psrlv_q_256;
3597 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3598 IID = Intrinsic::x86_avx2_psrlv_d;
3599 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3600 IID = Intrinsic::x86_avx2_psrlv_d_256;
3601 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3602 IID = Intrinsic::x86_avx512_psrlv_w_128;
3603 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3604 IID = Intrinsic::x86_avx512_psrlv_w_256;
3605 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3606 IID = Intrinsic::x86_avx512_psrlv_w_512;
3607 else
3608 llvm_unreachable("Unexpected size");
3609 } else if (Name.ends_with(".128")) {
3610 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3611 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3612 : Intrinsic::x86_sse2_psrl_d;
3613 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3614 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3615 : Intrinsic::x86_sse2_psrl_q;
3616 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3617 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3618 : Intrinsic::x86_sse2_psrl_w;
3619 else
3620 llvm_unreachable("Unexpected size");
3621 } else if (Name.ends_with(".256")) {
3622 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3623 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3624 : Intrinsic::x86_avx2_psrl_d;
3625 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3626 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3627 : Intrinsic::x86_avx2_psrl_q;
3628 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3629 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3630 : Intrinsic::x86_avx2_psrl_w;
3631 else
3632 llvm_unreachable("Unexpected size");
3633 } else {
3634 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3635 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
3636 IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
3637 Intrinsic::x86_avx512_psrl_d_512;
3638 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3639 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
3640 IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
3641 Intrinsic::x86_avx512_psrl_q_512;
3642 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3643 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3644 : Intrinsic::x86_avx512_psrl_w_512;
3645 else
3646 llvm_unreachable("Unexpected size");
3647 }
3648
3649 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3650 } else if (IsX86 && Name.starts_with("avx512.mask.psra")) {
3651 bool IsImmediate = Name[16] == 'i' ||
3652 (Name.size() > 18 && Name[18] == 'i');
3653 bool IsVariable = Name[16] == 'v';
3654 char Size = Name[16] == '.' ? Name[17] :
3655 Name[17] == '.' ? Name[18] :
3656 Name[18] == '.' ? Name[19] :
3657 Name[20];
3658
3659 Intrinsic::ID IID;
3660 if (IsVariable && Name[17] != '.') {
3661 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3662 IID = Intrinsic::x86_avx2_psrav_d;
3663 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3664 IID = Intrinsic::x86_avx2_psrav_d_256;
3665 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3666 IID = Intrinsic::x86_avx512_psrav_w_128;
3667 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3668 IID = Intrinsic::x86_avx512_psrav_w_256;
3669 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3670 IID = Intrinsic::x86_avx512_psrav_w_512;
3671 else
3672 llvm_unreachable("Unexpected size");
3673 } else if (Name.ends_with(".128")) {
3674 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3675 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3676 : Intrinsic::x86_sse2_psra_d;
3677 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3678 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
3679 IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
3680 Intrinsic::x86_avx512_psra_q_128;
3681 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3682 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3683 : Intrinsic::x86_sse2_psra_w;
3684 else
3685 llvm_unreachable("Unexpected size");
3686 } else if (Name.ends_with(".256")) {
3687 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3688 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3689 : Intrinsic::x86_avx2_psra_d;
3690 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3691 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
3692 IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
3693 Intrinsic::x86_avx512_psra_q_256;
3694 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3695 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3696 : Intrinsic::x86_avx2_psra_w;
3697 else
3698 llvm_unreachable("Unexpected size");
3699 } else {
3700 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3701 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
3702 IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
3703 Intrinsic::x86_avx512_psra_d_512;
3704 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3705 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
3706 IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
3707 Intrinsic::x86_avx512_psra_q_512;
3708 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3709 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3710 : Intrinsic::x86_avx512_psra_w_512;
3711 else
3712 llvm_unreachable("Unexpected size");
3713 }
3714
3715 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3716 } else if (IsX86 && Name.starts_with("avx512.mask.move.s")) {
3717 Rep = upgradeMaskedMove(Builder, *CI);
3718 } else if (IsX86 && Name.starts_with("avx512.cvtmask2")) {
3719 Rep = upgradeMaskToInt(Builder, *CI);
3720 } else if (IsX86 && Name.ends_with(".movntdqa")) {
3721 MDNode *Node = MDNode::get(
3722 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3723
3724 Value *Ptr = CI->getArgOperand(0);
3725
3726 // Convert the type of the pointer to a pointer to the stored type.
3727 Value *BC = Builder.CreateBitCast(
3728 Ptr, PointerType::getUnqual(CI->getType()), "cast");
3729 LoadInst *LI = Builder.CreateAlignedLoad(
3730 CI->getType(), BC,
3732 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3733 Rep = LI;
3734 } else if (IsX86 && (Name.starts_with("fma.vfmadd.") ||
3735 Name.starts_with("fma.vfmsub.") ||
3736 Name.starts_with("fma.vfnmadd.") ||
3737 Name.starts_with("fma.vfnmsub."))) {
3738 bool NegMul = Name[6] == 'n';
3739 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3740 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3741
3742 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3743 CI->getArgOperand(2) };
3744
3745 if (IsScalar) {
3746 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3747 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3748 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3749 }
3750
3751 if (NegMul && !IsScalar)
3752 Ops[0] = Builder.CreateFNeg(Ops[0]);
3753 if (NegMul && IsScalar)
3754 Ops[1] = Builder.CreateFNeg(Ops[1]);
3755 if (NegAcc)
3756 Ops[2] = Builder.CreateFNeg(Ops[2]);
3757
3759 Intrinsic::fma,
3760 Ops[0]->getType()),
3761 Ops);
3762
3763 if (IsScalar)
3764 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3765 (uint64_t)0);
3766 } else if (IsX86 && Name.starts_with("fma4.vfmadd.s")) {
3767 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3768 CI->getArgOperand(2) };
3769
3770 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3771 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3772 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3773
3775 Intrinsic::fma,
3776 Ops[0]->getType()),
3777 Ops);
3778
3780 Rep, (uint64_t)0);
3781 } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.s") ||
3782 Name.starts_with("avx512.maskz.vfmadd.s") ||
3783 Name.starts_with("avx512.mask3.vfmadd.s") ||
3784 Name.starts_with("avx512.mask3.vfmsub.s") ||
3785 Name.starts_with("avx512.mask3.vfnmsub.s"))) {
3786 bool IsMask3 = Name[11] == '3';
3787 bool IsMaskZ = Name[11] == 'z';
3788 // Drop the "avx512.mask." to make it easier.
3789 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3790 bool NegMul = Name[2] == 'n';
3791 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3792
3793 Value *A = CI->getArgOperand(0);
3794 Value *B = CI->getArgOperand(1);
3795 Value *C = CI->getArgOperand(2);
3796
3797 if (NegMul && (IsMask3 || IsMaskZ))
3798 A = Builder.CreateFNeg(A);
3799 if (NegMul && !(IsMask3 || IsMaskZ))
3800 B = Builder.CreateFNeg(B);
3801 if (NegAcc)
3802 C = Builder.CreateFNeg(C);
3803
3804 A = Builder.CreateExtractElement(A, (uint64_t)0);
3805 B = Builder.CreateExtractElement(B, (uint64_t)0);
3806 C = Builder.CreateExtractElement(C, (uint64_t)0);
3807
3808 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3809 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3810 Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3811
3812 Intrinsic::ID IID;
3813 if (Name.back() == 'd')
3814 IID = Intrinsic::x86_avx512_vfmadd_f64;
3815 else
3816 IID = Intrinsic::x86_avx512_vfmadd_f32;
3817 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3818 Rep = Builder.CreateCall(FMA, Ops);
3819 } else {
3821 Intrinsic::fma,
3822 A->getType());
3823 Rep = Builder.CreateCall(FMA, { A, B, C });
3824 }
3825
3826 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3827 IsMask3 ? C : A;
3828
3829 // For Mask3 with NegAcc, we need to create a new extractelement that
3830 // avoids the negation above.
3831 if (NegAcc && IsMask3)
3832 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3833 (uint64_t)0);
3834
3835 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3836 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3837 Rep, (uint64_t)0);
3838 } else if (IsX86 && (Name.starts_with("avx512.mask.vfmadd.p") ||
3839 Name.starts_with("avx512.mask.vfnmadd.p") ||
3840 Name.starts_with("avx512.mask.vfnmsub.p") ||
3841 Name.starts_with("avx512.mask3.vfmadd.p") ||
3842 Name.starts_with("avx512.mask3.vfmsub.p") ||
3843 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3844 Name.starts_with("avx512.maskz.vfmadd.p"))) {
3845 bool IsMask3 = Name[11] == '3';
3846 bool IsMaskZ = Name[11] == 'z';
3847 // Drop the "avx512.mask." to make it easier.
3848 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3849 bool NegMul = Name[2] == 'n';
3850 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3851
3852 Value *A = CI->getArgOperand(0);
3853 Value *B = CI->getArgOperand(1);
3854 Value *C = CI->getArgOperand(2);
3855
3856 if (NegMul && (IsMask3 || IsMaskZ))
3857 A = Builder.CreateFNeg(A);
3858 if (NegMul && !(IsMask3 || IsMaskZ))
3859 B = Builder.CreateFNeg(B);
3860 if (NegAcc)
3861 C = Builder.CreateFNeg(C);
3862
3863 if (CI->arg_size() == 5 &&
3864 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3865 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3866 Intrinsic::ID IID;
3867 // Check the character before ".512" in string.
3868 if (Name[Name.size()-5] == 's')
3869 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3870 else
3871 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3872
3873 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3874 { A, B, C, CI->getArgOperand(4) });
3875 } else {
3877 Intrinsic::fma,
3878 A->getType());
3879 Rep = Builder.CreateCall(FMA, { A, B, C });
3880 }
3881
3882 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3883 IsMask3 ? CI->getArgOperand(2) :
3884 CI->getArgOperand(0);
3885
3886 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3887 } else if (IsX86 && Name.starts_with("fma.vfmsubadd.p")) {
3888 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3889 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3890 Intrinsic::ID IID;
3891 if (VecWidth == 128 && EltWidth == 32)
3892 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3893 else if (VecWidth == 256 && EltWidth == 32)
3894 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3895 else if (VecWidth == 128 && EltWidth == 64)
3896 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3897 else if (VecWidth == 256 && EltWidth == 64)
3898 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3899 else
3900 llvm_unreachable("Unexpected intrinsic");
3901
3902 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3903 CI->getArgOperand(2) };
3904 Ops[2] = Builder.CreateFNeg(Ops[2]);
3905 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3906 Ops);
3907 } else if (IsX86 && (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3908 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3909 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3910 Name.starts_with("avx512.mask3.vfmsubadd.p"))) {
3911 bool IsMask3 = Name[11] == '3';
3912 bool IsMaskZ = Name[11] == 'z';
3913 // Drop the "avx512.mask." to make it easier.
3914 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3915 bool IsSubAdd = Name[3] == 's';
3916 if (CI->arg_size() == 5) {
3917 Intrinsic::ID IID;
3918 // Check the character before ".512" in string.
3919 if (Name[Name.size()-5] == 's')
3920 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3921 else
3922 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3923
3924 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3925 CI->getArgOperand(2), CI->getArgOperand(4) };
3926 if (IsSubAdd)
3927 Ops[2] = Builder.CreateFNeg(Ops[2]);
3928
3929 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3930 Ops);
3931 } else {
3932 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3933
3934 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3935 CI->getArgOperand(2) };
3936
3937 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3938 Ops[0]->getType());
3939 Value *Odd = Builder.CreateCall(FMA, Ops);
3940 Ops[2] = Builder.CreateFNeg(Ops[2]);
3941 Value *Even = Builder.CreateCall(FMA, Ops);
3942
3943 if (IsSubAdd)
3944 std::swap(Even, Odd);
3945
3946 SmallVector<int, 32> Idxs(NumElts);
3947 for (int i = 0; i != NumElts; ++i)
3948 Idxs[i] = i + (i % 2) * NumElts;
3949
3950 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3951 }
3952
3953 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3954 IsMask3 ? CI->getArgOperand(2) :
3955 CI->getArgOperand(0);
3956
3957 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3958 } else if (IsX86 && (Name.starts_with("avx512.mask.pternlog.") ||
3959 Name.starts_with("avx512.maskz.pternlog."))) {
3960 bool ZeroMask = Name[11] == 'z';
3961 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3962 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3963 Intrinsic::ID IID;
3964 if (VecWidth == 128 && EltWidth == 32)
3965 IID = Intrinsic::x86_avx512_pternlog_d_128;
3966 else if (VecWidth == 256 && EltWidth == 32)
3967 IID = Intrinsic::x86_avx512_pternlog_d_256;
3968 else if (VecWidth == 512 && EltWidth == 32)
3969 IID = Intrinsic::x86_avx512_pternlog_d_512;
3970 else if (VecWidth == 128 && EltWidth == 64)
3971 IID = Intrinsic::x86_avx512_pternlog_q_128;
3972 else if (VecWidth == 256 && EltWidth == 64)
3973 IID = Intrinsic::x86_avx512_pternlog_q_256;
3974 else if (VecWidth == 512 && EltWidth == 64)
3975 IID = Intrinsic::x86_avx512_pternlog_q_512;
3976 else
3977 llvm_unreachable("Unexpected intrinsic");
3978
3979 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3980 CI->getArgOperand(2), CI->getArgOperand(3) };
3981 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3982 Args);
3983 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3984 : CI->getArgOperand(0);
3985 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3986 } else if (IsX86 && (Name.starts_with("avx512.mask.vpmadd52") ||
3987 Name.starts_with("avx512.maskz.vpmadd52"))) {
3988 bool ZeroMask = Name[11] == 'z';
3989 bool High = Name[20] == 'h' || Name[21] == 'h';
3990 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3991 Intrinsic::ID IID;
3992 if (VecWidth == 128 && !High)
3993 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3994 else if (VecWidth == 256 && !High)
3995 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3996 else if (VecWidth == 512 && !High)
3997 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3998 else if (VecWidth == 128 && High)
3999 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4000 else if (VecWidth == 256 && High)
4001 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4002 else if (VecWidth == 512 && High)
4003 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4004 else
4005 llvm_unreachable("Unexpected intrinsic");
4006
4007 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
4008 CI->getArgOperand(2) };
4009 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4010 Args);
4011 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4012 : CI->getArgOperand(0);
4013 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4014 } else if (IsX86 && (Name.starts_with("avx512.mask.vpermi2var.") ||
4015 Name.starts_with("avx512.mask.vpermt2var.") ||
4016 Name.starts_with("avx512.maskz.vpermt2var."))) {
4017 bool ZeroMask = Name[11] == 'z';
4018 bool IndexForm = Name[17] == 'i';
4019 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4020 } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpbusd.") ||
4021 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4022 Name.starts_with("avx512.mask.vpdpbusds.") ||
4023 Name.starts_with("avx512.maskz.vpdpbusds."))) {
4024 bool ZeroMask = Name[11] == 'z';
4025 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4026 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4027 Intrinsic::ID IID;
4028 if (VecWidth == 128 && !IsSaturating)
4029 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4030 else if (VecWidth == 256 && !IsSaturating)
4031 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4032 else if (VecWidth == 512 && !IsSaturating)
4033 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4034 else if (VecWidth == 128 && IsSaturating)
4035 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4036 else if (VecWidth == 256 && IsSaturating)
4037 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4038 else if (VecWidth == 512 && IsSaturating)
4039 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4040 else
4041 llvm_unreachable("Unexpected intrinsic");
4042
4043 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4044 CI->getArgOperand(2) };
4045 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4046 Args);
4047 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4048 : CI->getArgOperand(0);
4049 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4050 } else if (IsX86 && (Name.starts_with("avx512.mask.vpdpwssd.") ||
4051 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4052 Name.starts_with("avx512.mask.vpdpwssds.") ||
4053 Name.starts_with("avx512.maskz.vpdpwssds."))) {
4054 bool ZeroMask = Name[11] == 'z';
4055 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4056 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4057 Intrinsic::ID IID;
4058 if (VecWidth == 128 && !IsSaturating)
4059 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4060 else if (VecWidth == 256 && !IsSaturating)
4061 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4062 else if (VecWidth == 512 && !IsSaturating)
4063 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4064 else if (VecWidth == 128 && IsSaturating)
4065 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4066 else if (VecWidth == 256 && IsSaturating)
4067 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4068 else if (VecWidth == 512 && IsSaturating)
4069 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4070 else
4071 llvm_unreachable("Unexpected intrinsic");
4072
4073 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4074 CI->getArgOperand(2) };
4075 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
4076 Args);
4077 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4078 : CI->getArgOperand(0);
4079 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4080 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4081 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4082 Name == "subborrow.u32" || Name == "subborrow.u64")) {
4083 Intrinsic::ID IID;
4084 if (Name[0] == 'a' && Name.back() == '2')
4085 IID = Intrinsic::x86_addcarry_32;
4086 else if (Name[0] == 'a' && Name.back() == '4')
4087 IID = Intrinsic::x86_addcarry_64;
4088 else if (Name[0] == 's' && Name.back() == '2')
4089 IID = Intrinsic::x86_subborrow_32;
4090 else if (Name[0] == 's' && Name.back() == '4')
4091 IID = Intrinsic::x86_subborrow_64;
4092 else
4093 llvm_unreachable("Unexpected intrinsic");
4094
4095 // Make a call with 3 operands.
4096 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
4097 CI->getArgOperand(2)};
4098 Value *NewCall = Builder.CreateCall(
4100 Args);
4101
4102 // Extract the second result and store it.
4103 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4104 // Cast the pointer to the right type.
4105 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
4106 llvm::PointerType::getUnqual(Data->getType()));
4107 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4108 // Replace the original call result with the first result of the new call.
4109 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4110
4111 CI->replaceAllUsesWith(CF);
4112 Rep = nullptr;
4113 } else if (IsX86 && Name.starts_with("avx512.mask.") &&
4114 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4115 // Rep will be updated by the call in the condition.
4116 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
4117 Value *Arg = CI->getArgOperand(0);
4118 Value *Neg = Builder.CreateNeg(Arg, "neg");
4119 Value *Cmp = Builder.CreateICmpSGE(
4120 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
4121 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
4122 } else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||
4123 Name.starts_with("atomic.load.add.f64.p"))) {
4124 Value *Ptr = CI->getArgOperand(0);
4125 Value *Val = CI->getArgOperand(1);
4126 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
4127 AtomicOrdering::SequentiallyConsistent);
4128 } else if (IsNVVM && Name.consume_front("max.") &&
4129 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4130 Name == "ui" || Name == "ull")) {
4131 Value *Arg0 = CI->getArgOperand(0);
4132 Value *Arg1 = CI->getArgOperand(1);
4133 Value *Cmp = Name.starts_with("u")
4134 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
4135 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
4136 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
4137 } else if (IsNVVM && Name.consume_front("min.") &&
4138 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
4139 Name == "ui" || Name == "ull")) {
4140 Value *Arg0 = CI->getArgOperand(0);
4141 Value *Arg1 = CI->getArgOperand(1);
4142 Value *Cmp = Name.starts_with("u")
4143 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
4144 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
4145 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
4146 } else if (IsNVVM && Name == "clz.ll") {
4147 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
4148 Value *Arg = CI->getArgOperand(0);
4149 Value *Ctlz = Builder.CreateCall(
4150 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
4151 {Arg->getType()}),
4152 {Arg, Builder.getFalse()}, "ctlz");
4153 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
4154 } else if (IsNVVM && Name == "popc.ll") {
4155 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
4156 // i64.
4157 Value *Arg = CI->getArgOperand(0);
4158 Value *Popc = Builder.CreateCall(
4159 Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
4160 {Arg->getType()}),
4161 Arg, "ctpop");
4162 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
4163 } else if (IsNVVM) {
4164 if (Name == "h2f") {
4165 Rep =
4167 F->getParent(), Intrinsic::convert_from_fp16,
4168 {Builder.getFloatTy()}),
4169 CI->getArgOperand(0), "h2f");
4170 } else {
4172 if (IID != Intrinsic::not_intrinsic &&
4173 !F->getReturnType()->getScalarType()->isBFloatTy()) {
4174 rename(F);
4175 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
4177 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
4178 Value *Arg = CI->getArgOperand(I);
4179 Type *OldType = Arg->getType();
4180 Type *NewType = NewFn->getArg(I)->getType();
4181 Args.push_back((OldType->isIntegerTy() &&
4182 NewType->getScalarType()->isBFloatTy())
4183 ? Builder.CreateBitCast(Arg, NewType)
4184 : Arg);
4185 }
4186 Rep = Builder.CreateCall(NewFn, Args);
4187 if (F->getReturnType()->isIntegerTy())
4188 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
4189 }
4190 }
4191 } else if (IsARM) {
4192 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4193 } else if (IsAMDGCN) {
4194 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4195 } else {
4196 llvm_unreachable("Unknown function for CallBase upgrade.");
4197 }
4198
4199 if (Rep)
4200 CI->replaceAllUsesWith(Rep);
4201 CI->eraseFromParent();
4202 return;
4203 }
4204
4205 const auto &DefaultCase = [&]() -> void {
4206 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4207 // Handle generic mangling change.
4208 assert(
4209 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4210 "Unknown function for CallBase upgrade and isn't just a name change");
4211 CI->setCalledFunction(NewFn);
4212 return;
4213 }
4214
4215 // This must be an upgrade from a named to a literal struct.
4216 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4217 assert(OldST != NewFn->getReturnType() &&
4218 "Return type must have changed");
4219 assert(OldST->getNumElements() ==
4220 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4221 "Must have same number of elements");
4222
4223 SmallVector<Value *> Args(CI->args());
4224 Value *NewCI = Builder.CreateCall(NewFn, Args);
4225 Value *Res = PoisonValue::get(OldST);
4226 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4227 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4228 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4229 }
4230 CI->replaceAllUsesWith(Res);
4231 CI->eraseFromParent();
4232 return;
4233 }
4234
4235 // We're probably about to produce something invalid. Let the verifier catch
4236 // it instead of dying here.
4237 CI->setCalledOperand(
4239 return;
4240 };
4241 CallInst *NewCall = nullptr;
4242 switch (NewFn->getIntrinsicID()) {
4243 default: {
4244 DefaultCase();
4245 return;
4246 }
4247 case Intrinsic::arm_neon_vst1:
4248 case Intrinsic::arm_neon_vst2:
4249 case Intrinsic::arm_neon_vst3:
4250 case Intrinsic::arm_neon_vst4:
4251 case Intrinsic::arm_neon_vst2lane:
4252 case Intrinsic::arm_neon_vst3lane:
4253 case Intrinsic::arm_neon_vst4lane: {
4254 SmallVector<Value *, 4> Args(CI->args());
4255 NewCall = Builder.CreateCall(NewFn, Args);
4256 break;
4257 }
4258 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4259 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4260 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4261 LLVMContext &Ctx = F->getParent()->getContext();
4262 SmallVector<Value *, 4> Args(CI->args());
4263 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4264 cast<ConstantInt>(Args[3])->getZExtValue());
4265 NewCall = Builder.CreateCall(NewFn, Args);
4266 break;
4267 }
4268 case Intrinsic::aarch64_sve_ld3_sret:
4269 case Intrinsic::aarch64_sve_ld4_sret:
4270 case Intrinsic::aarch64_sve_ld2_sret: {
4271 StringRef Name = F->getName();
4272 Name = Name.substr(5);
4273 unsigned N = StringSwitch<unsigned>(Name)
4274 .StartsWith("aarch64.sve.ld2", 2)
4275 .StartsWith("aarch64.sve.ld3", 3)
4276 .StartsWith("aarch64.sve.ld4", 4)
4277 .Default(0);
4279 dyn_cast<ScalableVectorType>(F->getReturnType());
4280 unsigned MinElts = RetTy->getMinNumElements() / N;
4281 SmallVector<Value *, 2> Args(CI->args());
4282 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4284 for (unsigned I = 0; I < N; I++) {
4285 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4286 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4287 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);
4288 }
4289 NewCall = dyn_cast<CallInst>(Ret);
4290 break;
4291 }
4292
4293 case Intrinsic::coro_end: {
4294 SmallVector<Value *, 3> Args(CI->args());
4295 Args.push_back(ConstantTokenNone::get(CI->getContext()));
4296 NewCall = Builder.CreateCall(NewFn, Args);
4297 break;
4298 }
4299
4300 case Intrinsic::vector_extract: {
4301 StringRef Name = F->getName();
4302 Name = Name.substr(5); // Strip llvm
4303 if (!Name.starts_with("aarch64.sve.tuple.get")) {
4304 DefaultCase();
4305 return;
4306 }
4308 dyn_cast<ScalableVectorType>(F->getReturnType());
4309 unsigned MinElts = RetTy->getMinNumElements();
4310 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4311 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4312 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
4313 break;
4314 }
4315
4316 case Intrinsic::vector_insert: {
4317 StringRef Name = F->getName();
4318 Name = Name.substr(5);
4319 if (!Name.starts_with("aarch64.sve.tuple")) {
4320 DefaultCase();
4321 return;
4322 }
4323 if (Name.starts_with("aarch64.sve.tuple.set")) {
4324 unsigned I = dyn_cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
4325 ScalableVectorType *Ty =
4326 dyn_cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
4327 Value *NewIdx =
4328 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
4329 NewCall = Builder.CreateCall(
4330 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
4331 break;
4332 }
4333 if (Name.starts_with("aarch64.sve.tuple.create")) {
4334 unsigned N = StringSwitch<unsigned>(Name)
4335 .StartsWith("aarch64.sve.tuple.create2", 2)
4336 .StartsWith("aarch64.sve.tuple.create3", 3)
4337 .StartsWith("aarch64.sve.tuple.create4", 4)
4338 .Default(0);
4339 assert(N > 1 && "Create is expected to be between 2-4");
4341 dyn_cast<ScalableVectorType>(F->getReturnType());
4343 unsigned MinElts = RetTy->getMinNumElements() / N;
4344 for (unsigned I = 0; I < N; I++) {
4345 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
4346 Value *V = CI->getArgOperand(I);
4347 Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
4348 }
4349 NewCall = dyn_cast<CallInst>(Ret);
4350 }
4351 break;
4352 }
4353
4354 case Intrinsic::arm_neon_bfdot:
4355 case Intrinsic::arm_neon_bfmmla:
4356 case Intrinsic::arm_neon_bfmlalb:
4357 case Intrinsic::arm_neon_bfmlalt:
4358 case Intrinsic::aarch64_neon_bfdot:
4359 case Intrinsic::aarch64_neon_bfmmla:
4360 case Intrinsic::aarch64_neon_bfmlalb:
4361 case Intrinsic::aarch64_neon_bfmlalt: {
4363 assert(CI->arg_size() == 3 &&
4364 "Mismatch between function args and call args");
4365 size_t OperandWidth =
4367 assert((OperandWidth == 64 || OperandWidth == 128) &&
4368 "Unexpected operand width");
4369 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
4370 auto Iter = CI->args().begin();
4371 Args.push_back(*Iter++);
4372 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4373 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
4374 NewCall = Builder.CreateCall(NewFn, Args);
4375 break;
4376 }
4377
4378 case Intrinsic::bitreverse:
4379 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4380 break;
4381
4382 case Intrinsic::ctlz:
4383 case Intrinsic::cttz:
4384 assert(CI->arg_size() == 1 &&
4385 "Mismatch between function args and call args");
4386 NewCall =
4387 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
4388 break;
4389
4390 case Intrinsic::objectsize: {
4391 Value *NullIsUnknownSize =
4392 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
4393 Value *Dynamic =
4394 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
4395 NewCall = Builder.CreateCall(
4396 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
4397 break;
4398 }
4399
4400 case Intrinsic::ctpop:
4401 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4402 break;
4403
4404 case Intrinsic::convert_from_fp16:
4405 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
4406 break;
4407
4408 case Intrinsic::dbg_value: {
4409 StringRef Name = F->getName();
4410 Name = Name.substr(5); // Strip llvm.
4411 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
4412 if (Name.starts_with("dbg.addr")) {
4413 DIExpression *Expr = cast<DIExpression>(
4414 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
4415 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4416 NewCall =
4417 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4418 MetadataAsValue::get(C, Expr)});
4419 break;
4420 }
4421
4422 // Upgrade from the old version that had an extra offset argument.
4423 assert(CI->arg_size() == 4);
4424 // Drop nonzero offsets instead of attempting to upgrade them.
4425 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
4426 if (Offset->isZeroValue()) {
4427 NewCall = Builder.CreateCall(
4428 NewFn,
4429 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
4430 break;
4431 }
4432 CI->eraseFromParent();
4433 return;
4434 }
4435
4436 case Intrinsic::ptr_annotation:
4437 // Upgrade from versions that lacked the annotation attribute argument.
4438 if (CI->arg_size() != 4) {
4439 DefaultCase();
4440 return;
4441 }
4442
4443 // Create a new call with an added null annotation attribute argument.
4444 NewCall =
4445 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4446 CI->getArgOperand(2), CI->getArgOperand(3),
4447 Constant::getNullValue(Builder.getPtrTy())});
4448 NewCall->takeName(CI);
4449 CI->replaceAllUsesWith(NewCall);
4450 CI->eraseFromParent();
4451 return;
4452
4453 case Intrinsic::var_annotation:
4454 // Upgrade from versions that lacked the annotation attribute argument.
4455 if (CI->arg_size() != 4) {
4456 DefaultCase();
4457 return;
4458 }
4459 // Create a new call with an added null annotation attribute argument.
4460 NewCall =
4461 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
4462 CI->getArgOperand(2), CI->getArgOperand(3),
4463 Constant::getNullValue(Builder.getPtrTy())});
4464 NewCall->takeName(CI);
4465 CI->replaceAllUsesWith(NewCall);
4466 CI->eraseFromParent();
4467 return;
4468
4469 case Intrinsic::riscv_aes32dsi:
4470 case Intrinsic::riscv_aes32dsmi:
4471 case Intrinsic::riscv_aes32esi:
4472 case Intrinsic::riscv_aes32esmi:
4473 case Intrinsic::riscv_sm4ks:
4474 case Intrinsic::riscv_sm4ed: {
4475 // The last argument to these intrinsics used to be i8 and changed to i32.
4476 // The type overload for sm4ks and sm4ed was removed.
4477 Value *Arg2 = CI->getArgOperand(2);
4478 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
4479 return;
4480
4481 Value *Arg0 = CI->getArgOperand(0);
4482 Value *Arg1 = CI->getArgOperand(1);
4483 if (CI->getType()->isIntegerTy(64)) {
4484 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
4485 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
4486 }
4487
4488 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
4489 cast<ConstantInt>(Arg2)->getZExtValue());
4490
4491 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
4492 Value *Res = NewCall;
4493 if (Res->getType() != CI->getType())
4494 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4495 NewCall->takeName(CI);
4496 CI->replaceAllUsesWith(Res);
4497 CI->eraseFromParent();
4498 return;
4499 }
4500 case Intrinsic::riscv_sha256sig0:
4501 case Intrinsic::riscv_sha256sig1:
4502 case Intrinsic::riscv_sha256sum0:
4503 case Intrinsic::riscv_sha256sum1:
4504 case Intrinsic::riscv_sm3p0:
4505 case Intrinsic::riscv_sm3p1: {
4506 // The last argument to these intrinsics used to be i8 and changed to i32.
4507 // The type overload for sm4ks and sm4ed was removed.
4508 if (!CI->getType()->isIntegerTy(64))
4509 return;
4510
4511 Value *Arg =
4512 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
4513
4514 NewCall = Builder.CreateCall(NewFn, Arg);
4515 Value *Res =
4516 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
4517 NewCall->takeName(CI);
4518 CI->replaceAllUsesWith(Res);
4519 CI->eraseFromParent();
4520 return;
4521 }
4522
4523 case Intrinsic::x86_xop_vfrcz_ss:
4524 case Intrinsic::x86_xop_vfrcz_sd:
4525 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
4526 break;
4527
4528 case Intrinsic::x86_xop_vpermil2pd:
4529 case Intrinsic::x86_xop_vpermil2ps:
4530 case Intrinsic::x86_xop_vpermil2pd_256:
4531 case Intrinsic::x86_xop_vpermil2ps_256: {
4532 SmallVector<Value *, 4> Args(CI->args());
4533 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
4534 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
4535 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
4536 NewCall = Builder.CreateCall(NewFn, Args);
4537 break;
4538 }
4539
4540 case Intrinsic::x86_sse41_ptestc:
4541 case Intrinsic::x86_sse41_ptestz:
4542 case Intrinsic::x86_sse41_ptestnzc: {
4543 // The arguments for these intrinsics used to be v4f32, and changed
4544 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
4545 // So, the only thing required is a bitcast for both arguments.
4546 // First, check the arguments have the old type.
4547 Value *Arg0 = CI->getArgOperand(0);
4548 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
4549 return;
4550
4551 // Old intrinsic, add bitcasts
4552 Value *Arg1 = CI->getArgOperand(1);
4553
4554 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
4555
4556 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
4557 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
4558
4559 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
4560 break;
4561 }
4562
4563 case Intrinsic::x86_rdtscp: {
4564 // This used to take 1 arguments. If we have no arguments, it is already
4565 // upgraded.
4566 if (CI->getNumOperands() == 0)
4567 return;
4568
4569 NewCall = Builder.CreateCall(NewFn);
4570 // Extract the second result and store it.
4571 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4572 // Cast the pointer to the right type.
4573 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
4574 llvm::PointerType::getUnqual(Data->getType()));
4575 Builder.CreateAlignedStore(Data, Ptr, Align(1));
4576 // Replace the original call result with the first result of the new call.
4577 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
4578
4579 NewCall->takeName(CI);
4580 CI->replaceAllUsesWith(TSC);
4581 CI->eraseFromParent();
4582 return;
4583 }
4584
4585 case Intrinsic::x86_sse41_insertps:
4586 case Intrinsic::x86_sse41_dppd:
4587 case Intrinsic::x86_sse41_dpps:
4588 case Intrinsic::x86_sse41_mpsadbw:
4589 case Intrinsic::x86_avx_dp_ps_256:
4590 case Intrinsic::x86_avx2_mpsadbw: {
4591 // Need to truncate the last argument from i32 to i8 -- this argument models
4592 // an inherently 8-bit immediate operand to these x86 instructions.
4593 SmallVector<Value *, 4> Args(CI->args());
4594
4595 // Replace the last argument with a trunc.
4596 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
4597 NewCall = Builder.CreateCall(NewFn, Args);
4598 break;
4599 }
4600
4601 case Intrinsic::x86_avx512_mask_cmp_pd_128:
4602 case Intrinsic::x86_avx512_mask_cmp_pd_256:
4603 case Intrinsic::x86_avx512_mask_cmp_pd_512:
4604 case Intrinsic::x86_avx512_mask_cmp_ps_128:
4605 case Intrinsic::x86_avx512_mask_cmp_ps_256:
4606 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
4607 SmallVector<Value *, 4> Args(CI->args());
4608 unsigned NumElts =
4609 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
4610 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
4611
4612 NewCall = Builder.CreateCall(NewFn, Args);
4613 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
4614
4615 NewCall->takeName(CI);
4616 CI->replaceAllUsesWith(Res);
4617 CI->eraseFromParent();
4618 return;
4619 }
4620
4621 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
4622 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
4623 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
4624 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
4625 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
4626 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
4627 SmallVector<Value *, 4> Args(CI->args());
4628 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4629 if (NewFn->getIntrinsicID() ==
4630 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
4631 Args[1] = Builder.CreateBitCast(
4632 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4633
4634 NewCall = Builder.CreateCall(NewFn, Args);
4635 Value *Res = Builder.CreateBitCast(
4636 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
4637
4638 NewCall->takeName(CI);
4639 CI->replaceAllUsesWith(Res);
4640 CI->eraseFromParent();
4641 return;
4642 }
4643 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
4644 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
4645 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
4646 SmallVector<Value *, 4> Args(CI->args());
4647 unsigned NumElts =
4648 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
4649 Args[1] = Builder.CreateBitCast(
4650 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4651 Args[2] = Builder.CreateBitCast(
4652 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
4653
4654 NewCall = Builder.CreateCall(NewFn, Args);
4655 break;
4656 }
4657
4658 case Intrinsic::thread_pointer: {
4659 NewCall = Builder.CreateCall(NewFn, {});
4660 break;
4661 }
4662
4663 case Intrinsic::memcpy:
4664 case Intrinsic::memmove:
4665 case Intrinsic::memset: {
4666 // We have to make sure that the call signature is what we're expecting.
4667 // We only want to change the old signatures by removing the alignment arg:
4668 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
4669 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
4670 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
4671 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
4672 // Note: i8*'s in the above can be any pointer type
4673 if (CI->arg_size() != 5) {
4674 DefaultCase();
4675 return;
4676 }
4677 // Remove alignment argument (3), and add alignment attributes to the
4678 // dest/src pointers.
4679 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
4680 CI->getArgOperand(2), CI->getArgOperand(4)};
4681 NewCall = Builder.CreateCall(NewFn, Args);
4682 AttributeList OldAttrs = CI->getAttributes();
4684 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
4685 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
4686 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
4687 NewCall->setAttributes(NewAttrs);
4688 auto *MemCI = cast<MemIntrinsic>(NewCall);
4689 // All mem intrinsics support dest alignment.
4690 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
4691 MemCI->setDestAlignment(Align->getMaybeAlignValue());
4692 // Memcpy/Memmove also support source alignment.
4693 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
4694 MTI->setSourceAlignment(Align->getMaybeAlignValue());
4695 break;
4696 }
4697 }
4698 assert(NewCall && "Should have either set this variable or returned through "
4699 "the default case");
4700 NewCall->takeName(CI);
4701 CI->replaceAllUsesWith(NewCall);
4702 CI->eraseFromParent();
4703}
4704
4706 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
4707
4708 // Check if this function should be upgraded and get the replacement function
4709 // if there is one.
4710 Function *NewFn;
4711 if (UpgradeIntrinsicFunction(F, NewFn)) {
4712 // Replace all users of the old function with the new function or new
4713 // instructions. This is not a range loop because the call is deleted.
4714 for (User *U : make_early_inc_range(F->users()))
4715 if (CallBase *CB = dyn_cast<CallBase>(U))
4716 UpgradeIntrinsicCall(CB, NewFn);
4717
4718 // Remove old function, no longer used, from the module.
4719 F->eraseFromParent();
4720 }
4721}
4722
4724 const unsigned NumOperands = MD.getNumOperands();
4725 if (NumOperands == 0)
4726 return &MD; // Invalid, punt to a verifier error.
4727
4728 // Check if the tag uses struct-path aware TBAA format.
4729 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
4730 return &MD;
4731
4732 auto &Context = MD.getContext();
4733 if (NumOperands == 3) {
4734 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
4735 MDNode *ScalarType = MDNode::get(Context, Elts);
4736 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
4737 Metadata *Elts2[] = {ScalarType, ScalarType,
4740 MD.getOperand(2)};
4741 return MDNode::get(Context, Elts2);
4742 }
4743 // Create a MDNode <MD, MD, offset 0>
4746 return MDNode::get(Context, Elts);
4747}
4748
4750 Instruction *&Temp) {
4751 if (Opc != Instruction::BitCast)
4752 return nullptr;
4753
4754 Temp = nullptr;
4755 Type *SrcTy = V->getType();
4756 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4757 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4758 LLVMContext &Context = V->getContext();
4759
4760 // We have no information about target data layout, so we assume that
4761 // the maximum pointer size is 64bit.
4762 Type *MidTy = Type::getInt64Ty(Context);
4763 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
4764
4765 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
4766 }
4767
4768 return nullptr;
4769}
4770
4772 if (Opc != Instruction::BitCast)
4773 return nullptr;
4774
4775 Type *SrcTy = C->getType();
4776 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
4777 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
4778 LLVMContext &Context = C->getContext();
4779
4780 // We have no information about target data layout, so we assume that
4781 // the maximum pointer size is 64bit.
4782 Type *MidTy = Type::getInt64Ty(Context);
4783
4785 DestTy);
4786 }
4787
4788 return nullptr;
4789}
4790
4791/// Check the debug info version number, if it is out-dated, drop the debug
4792/// info. Return true if module is modified.
4795 return false;
4796
4797 unsigned Version = getDebugMetadataVersionFromModule(M);
4798 if (Version == DEBUG_METADATA_VERSION) {
4799 bool BrokenDebugInfo = false;
4800 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
4801 report_fatal_error("Broken module found, compilation aborted!");
4802 if (!BrokenDebugInfo)
4803 // Everything is ok.
4804 return false;
4805 else {
4806 // Diagnose malformed debug info.
4808 M.getContext().diagnose(Diag);
4809 }
4810 }
4811 bool Modified = StripDebugInfo(M);
4812 if (Modified && Version != DEBUG_METADATA_VERSION) {
4813 // Diagnose a version mismatch.
4814 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
4815 M.getContext().diagnose(DiagVersion);
4816 }
4817 return Modified;
4818}
4819
4820/// This checks for objc retain release marker which should be upgraded. It
4821/// returns true if module is modified.
4823 bool Changed = false;
4824 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
4825 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
4826 if (ModRetainReleaseMarker) {
4827 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
4828 if (Op) {
4829 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
4830 if (ID) {
4831 SmallVector<StringRef, 4> ValueComp;
4832 ID->getString().split(ValueComp, "#");
4833 if (ValueComp.size() == 2) {
4834 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
4835 ID = MDString::get(M.getContext(), NewValue);
4836 }
4837 M.addModuleFlag(Module::Error, MarkerKey, ID);
4838 M.eraseNamedMetadata(ModRetainReleaseMarker);
4839 Changed = true;
4840 }
4841 }
4842 }
4843 return Changed;
4844}
4845
4847 // This lambda converts normal function calls to ARC runtime functions to
4848 // intrinsic calls.
4849 auto UpgradeToIntrinsic = [&](const char *OldFunc,
4850 llvm::Intrinsic::ID IntrinsicFunc) {
4851 Function *Fn = M.getFunction(OldFunc);
4852
4853 if (!Fn)
4854 return;
4855
4856 Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
4857
4858 for (User *U : make_early_inc_range(Fn->users())) {
4859 CallInst *CI = dyn_cast<CallInst>(U);
4860 if (!CI || CI->getCalledFunction() != Fn)
4861 continue;
4862
4863 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
4864 FunctionType *NewFuncTy = NewFn->getFunctionType();
4866
4867 // Don't upgrade the intrinsic if it's not valid to bitcast the return
4868 // value to the return type of the old function.
4869 if (NewFuncTy->getReturnType() != CI->getType() &&
4870 !CastInst::castIsValid(Instruction::BitCast, CI,
4871 NewFuncTy->getReturnType()))
4872 continue;
4873
4874 bool InvalidCast = false;
4875
4876 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
4877 Value *Arg = CI->getArgOperand(I);
4878
4879 // Bitcast argument to the parameter type of the new function if it's
4880 // not a variadic argument.
4881 if (I < NewFuncTy->getNumParams()) {
4882 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
4883 // to the parameter type of the new function.
4884 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
4885 NewFuncTy->getParamType(I))) {
4886 InvalidCast = true;
4887 break;
4888 }
4889 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
4890 }
4891 Args.push_back(Arg);
4892 }
4893
4894 if (InvalidCast)
4895 continue;
4896
4897 // Create a call instruction that calls the new function.
4898 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
4899 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4900 NewCall->takeName(CI);
4901
4902 // Bitcast the return value back to the type of the old call.
4903 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
4904
4905 if (!CI->use_empty())
4906 CI->replaceAllUsesWith(NewRetVal);
4907 CI->eraseFromParent();
4908 }
4909
4910 if (Fn->use_empty())
4911 Fn->eraseFromParent();
4912 };
4913
4914 // Unconditionally convert a call to "clang.arc.use" to a call to
4915 // "llvm.objc.clang.arc.use".
4916 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
4917
4918 // Upgrade the retain release marker. If there is no need to upgrade
4919 // the marker, that means either the module is already new enough to contain
4920 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
4922 return;
4923
4924 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
4925 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
4926 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
4927 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
4928 {"objc_autoreleaseReturnValue",
4929 llvm::Intrinsic::objc_autoreleaseReturnValue},
4930 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
4931 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
4932 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
4933 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
4934 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
4935 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
4936 {"objc_release", llvm::Intrinsic::objc_release},
4937 {"objc_retain", llvm::Intrinsic::objc_retain},
4938 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
4939 {"objc_retainAutoreleaseReturnValue",
4940 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
4941 {"objc_retainAutoreleasedReturnValue",
4942 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
4943 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
4944 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
4945 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
4946 {"objc_unsafeClaimAutoreleasedReturnValue",
4947 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
4948 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
4949 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
4950 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
4951 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
4952 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
4953 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
4954 {"objc_arc_annotation_topdown_bbstart",
4955 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
4956 {"objc_arc_annotation_topdown_bbend",
4957 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
4958 {"objc_arc_annotation_bottomup_bbstart",
4959 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
4960 {"objc_arc_annotation_bottomup_bbend",
4961 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
4962
4963 for (auto &I : RuntimeFuncs)
4964 UpgradeToIntrinsic(I.first, I.second);
4965}
4966
4968 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
4969 if (!ModFlags)
4970 return false;
4971
4972 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
4973 bool HasSwiftVersionFlag = false;
4974 uint8_t SwiftMajorVersion, SwiftMinorVersion;
4975 uint32_t SwiftABIVersion;
4976 auto Int8Ty = Type::getInt8Ty(M.getContext());
4977 auto Int32Ty = Type::getInt32Ty(M.getContext());
4978
4979 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
4980 MDNode *Op = ModFlags->getOperand(I);
4981 if (Op->getNumOperands() != 3)
4982 continue;
4983 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
4984 if (!ID)
4985 continue;
4986 auto SetBehavior = [&](Module::ModFlagBehavior B) {
4987 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
4988 Type::getInt32Ty(M.getContext()), B)),
4989 MDString::get(M.getContext(), ID->getString()),
4990 Op->getOperand(2)};
4991 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
4992 Changed = true;
4993 };
4994
4995 if (ID->getString() == "Objective-C Image Info Version")
4996 HasObjCFlag = true;
4997 if (ID->getString() == "Objective-C Class Properties")
4998 HasClassProperties = true;
4999 // Upgrade PIC from Error/Max to Min.
5000 if (ID->getString() == "PIC Level") {
5001 if (auto *Behavior =
5002 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5003 uint64_t V = Behavior->getLimitedValue();
5004 if (V == Module::Error || V == Module::Max)
5005 SetBehavior(Module::Min);
5006 }
5007 }
5008 // Upgrade "PIE Level" from Error to Max.
5009 if (ID->getString() == "PIE Level")
5010 if (auto *Behavior =
5011 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))
5012 if (Behavior->getLimitedValue() == Module::Error)
5013 SetBehavior(Module::Max);
5014
5015 // Upgrade branch protection and return address signing module flags. The
5016 // module flag behavior for these fields were Error and now they are Min.
5017 if (ID->getString() == "branch-target-enforcement" ||
5018 ID->getString().starts_with("sign-return-address")) {
5019 if (auto *Behavior =
5020 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
5021 if (Behavior->getLimitedValue() == Module::Error) {
5022 Type *Int32Ty = Type::getInt32Ty(M.getContext());
5023 Metadata *Ops[3] = {
5024 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
5025 Op->getOperand(1), Op->getOperand(2)};
5026 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5027 Changed = true;
5028 }
5029 }
5030 }
5031
5032 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
5033 // section name so that llvm-lto will not complain about mismatching
5034 // module flags that is functionally the same.
5035 if (ID->getString() == "Objective-C Image Info Section") {
5036 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
5037 SmallVector<StringRef, 4> ValueComp;
5038 Value->getString().split(ValueComp, " ");
5039 if (ValueComp.size() != 1) {
5040 std::string NewValue;
5041 for (auto &S : ValueComp)
5042 NewValue += S.str();
5043 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
5044 MDString::get(M.getContext(), NewValue)};
5045 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5046 Changed = true;
5047 }
5048 }
5049 }
5050
5051 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
5052 // If the higher bits are set, it adds new module flag for swift info.
5053 if (ID->getString() == "Objective-C Garbage Collection") {
5054 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
5055 if (Md) {
5056 assert(Md->getValue() && "Expected non-empty metadata");
5057 auto Type = Md->getValue()->getType();
5058 if (Type == Int8Ty)
5059 continue;
5060 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
5061 if ((Val & 0xff) != Val) {
5062 HasSwiftVersionFlag = true;
5063 SwiftABIVersion = (Val & 0xff00) >> 8;
5064 SwiftMajorVersion = (Val & 0xff000000) >> 24;
5065 SwiftMinorVersion = (Val & 0xff0000) >> 16;
5066 }
5067 Metadata *Ops[3] = {
5069 Op->getOperand(1),
5070 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
5071 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5072 Changed = true;
5073 }
5074 }
5075
5076 if (ID->getString() == "amdgpu_code_object_version") {
5077 Metadata *Ops[3] = {
5078 Op->getOperand(0),
5079 MDString::get(M.getContext(), "amdhsa_code_object_version"),
5080 Op->getOperand(2)};
5081 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
5082 Changed = true;
5083 }
5084 }
5085
5086 // "Objective-C Class Properties" is recently added for Objective-C. We
5087 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
5088 // flag of value 0, so we can correclty downgrade this flag when trying to
5089 // link an ObjC bitcode without this module flag with an ObjC bitcode with
5090 // this module flag.
5091 if (HasObjCFlag && !HasClassProperties) {
5092 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
5093 (uint32_t)0);
5094 Changed = true;
5095 }
5096
5097 if (HasSwiftVersionFlag) {
5098 M.addModuleFlag(Module::Error, "Swift ABI Version",
5099 SwiftABIVersion);
5100 M.addModuleFlag(Module::Error, "Swift Major Version",
5101 ConstantInt::get(Int8Ty, SwiftMajorVersion));
5102 M.addModuleFlag(Module::Error, "Swift Minor Version",
5103 ConstantInt::get(Int8Ty, SwiftMinorVersion));
5104 Changed = true;
5105 }
5106
5107 return Changed;
5108}
5109
5111 auto TrimSpaces = [](StringRef Section) -> std::string {
5112 SmallVector<StringRef, 5> Components;
5113 Section.split(Components, ',');
5114
5115 SmallString<32> Buffer;
5116 raw_svector_ostream OS(Buffer);
5117
5118 for (auto Component : Components)
5119 OS << ',' << Component.trim();
5120
5121 return std::string(OS.str().substr(1));
5122 };
5123
5124 for (auto &GV : M.globals()) {
5125 if (!GV.hasSection())
5126 continue;
5127
5128 StringRef Section = GV.getSection();
5129
5130 if (!Section.starts_with("__DATA, __objc_catlist"))
5131 continue;
5132
5133 // __DATA, __objc_catlist, regular, no_dead_strip
5134 // __DATA,__objc_catlist,regular,no_dead_strip
5135 GV.setSection(TrimSpaces(Section));
5136 }
5137}
5138
5139namespace {
5140// Prior to LLVM 10.0, the strictfp attribute could be used on individual
5141// callsites within a function that did not also have the strictfp attribute.
5142// Since 10.0, if strict FP semantics are needed within a function, the
5143// function must have the strictfp attribute and all calls within the function
5144// must also have the strictfp attribute. This latter restriction is
5145// necessary to prevent unwanted libcall simplification when a function is
5146// being cloned (such as for inlining).
5147//
5148// The "dangling" strictfp attribute usage was only used to prevent constant
5149// folding and other libcall simplification. The nobuiltin attribute on the
5150// callsite has the same effect.
5151struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
5152 StrictFPUpgradeVisitor() = default;
5153
5154 void visitCallBase(CallBase &Call) {
5155 if (!Call.isStrictFP())
5156 return;
5157 if (isa<ConstrainedFPIntrinsic>(&Call))
5158 return;
5159 // If we get here, the caller doesn't have the strictfp attribute
5160 // but this callsite does. Replace the strictfp attribute with nobuiltin.
5161 Call.removeFnAttr(Attribute::StrictFP);
5162 Call.addFnAttr(Attribute::NoBuiltin);
5163 }
5164};
5165} // namespace
5166
5168 // If a function definition doesn't have the strictfp attribute,
5169 // convert any callsite strictfp attributes to nobuiltin.
5170 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
5171 StrictFPUpgradeVisitor SFPV;
5172 SFPV.visit(F);
5173 }
5174
5175 // Remove all incompatibile attributes from function.
5176 F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
5177 for (auto &Arg : F.args())
5178 Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
5179}
5180
5181static bool isOldLoopArgument(Metadata *MD) {
5182 auto *T = dyn_cast_or_null<MDTuple>(MD);
5183 if (!T)
5184 return false;
5185 if (T->getNumOperands() < 1)
5186 return false;
5187 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
5188 if (!S)
5189 return false;
5190 return S->getString().starts_with("llvm.vectorizer.");
5191}
5192
5194 StringRef OldPrefix = "llvm.vectorizer.";
5195 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
5196
5197 if (OldTag == "llvm.vectorizer.unroll")
5198 return MDString::get(C, "llvm.loop.interleave.count");
5199
5200 return MDString::get(
5201 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
5202 .str());
5203}
5204
5206 auto *T = dyn_cast_or_null<MDTuple>(MD);
5207 if (!T)
5208 return MD;
5209 if (T->getNumOperands() < 1)
5210 return MD;
5211 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
5212 if (!OldTag)
5213 return MD;
5214 if (!OldTag->getString().starts_with("llvm.vectorizer."))
5215 return MD;
5216
5217 // This has an old tag. Upgrade it.
5219 Ops.reserve(T->getNumOperands());
5220 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
5221 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
5222 Ops.push_back(T->getOperand(I));
5223
5224 return MDTuple::get(T->getContext(), Ops);
5225}
5226
5228 auto *T = dyn_cast<MDTuple>(&N);
5229 if (!T)
5230 return &N;
5231
5232 if (none_of(T->operands(), isOldLoopArgument))
5233 return &N;
5234
5236 Ops.reserve(T->getNumOperands());
5237 for (Metadata *MD : T->operands())
5239
5240 return MDTuple::get(T->getContext(), Ops);
5241}
5242
5244 Triple T(TT);
5245 // The only data layout upgrades needed for pre-GCN are setting the address
5246 // space of globals to 1.
5247 if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") &&
5248 !DL.starts_with("G")) {
5249 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
5250 }
5251
5252 if (T.isRISCV64()) {
5253 // Make i32 a native type for 64-bit RISC-V.
5254 auto I = DL.find("-n64-");
5255 if (I != StringRef::npos)
5256 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
5257 return DL.str();
5258 }
5259
5260 std::string Res = DL.str();
5261 // AMDGCN data layout upgrades.
5262 if (T.isAMDGCN()) {
5263 // Define address spaces for constants.
5264 if (!DL.contains("-G") && !DL.starts_with("G"))
5265 Res.append(Res.empty() ? "G1" : "-G1");
5266
5267 // Add missing non-integral declarations.
5268 // This goes before adding new address spaces to prevent incoherent string
5269 // values.
5270 if (!DL.contains("-ni") && !DL.starts_with("ni"))
5271 Res.append("-ni:7:8:9");
5272 // Update ni:7 to ni:7:8:9.
5273 if (DL.ends_with("ni:7"))
5274 Res.append(":8:9");
5275 if (DL.ends_with("ni:7:8"))
5276 Res.append(":9");
5277
5278 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
5279 // resources) An empty data layout has already been upgraded to G1 by now.
5280 if (!DL.contains("-p7") && !DL.starts_with("p7"))
5281 Res.append("-p7:160:256:256:32");
5282 if (!DL.contains("-p8") && !DL.starts_with("p8"))
5283 Res.append("-p8:128:128");
5284 if (!DL.contains("-p9") && !DL.starts_with("p9"))
5285 Res.append("-p9:192:256:256:32");
5286
5287 return Res;
5288 }
5289
5290 if (!T.isX86())
5291 return Res;
5292
5293 // If the datalayout matches the expected format, add pointer size address
5294 // spaces to the datalayout.
5295 std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
5296 if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {
5298 Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
5299 if (R.match(Res, &Groups))
5300 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
5301 }
5302
5303 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
5304 // for i128 operations prior to this being reflected in the data layout, and
5305 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
5306 // boundaries, so although this is a breaking change, the upgrade is expected
5307 // to fix more IR than it breaks.
5308 // Intel MCU is an exception and uses 4-byte-alignment.
5309 if (!T.isOSIAMCU()) {
5310 std::string I128 = "-i128:128";
5311 if (StringRef Ref = Res; !Ref.contains(I128)) {
5313 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
5314 if (R.match(Res, &Groups))
5315 Res = (Groups[1] + I128 + Groups[3]).str();
5316 }
5317 }
5318
5319 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
5320 // Raising the alignment is safe because Clang did not produce f80 values in
5321 // the MSVC environment before this upgrade was added.
5322 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
5323 StringRef Ref = Res;
5324 auto I = Ref.find("-f80:32-");
5325 if (I != StringRef::npos)
5326 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
5327 }
5328
5329 return Res;
5330}
5331
5333 StringRef FramePointer;
5334 Attribute A = B.getAttribute("no-frame-pointer-elim");
5335 if (A.isValid()) {
5336 // The value can be "true" or "false".
5337 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
5338 B.removeAttribute("no-frame-pointer-elim");
5339 }
5340 if (B.contains("no-frame-pointer-elim-non-leaf")) {
5341 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
5342 if (FramePointer != "all")
5343 FramePointer = "non-leaf";
5344 B.removeAttribute("no-frame-pointer-elim-non-leaf");
5345 }
5346 if (!FramePointer.empty())
5347 B.addAttribute("frame-pointer", FramePointer);
5348
5349 A = B.getAttribute("null-pointer-is-valid");
5350 if (A.isValid()) {
5351 // The value can be "true" or "false".
5352 bool NullPointerIsValid = A.getValueAsString() == "true";
5353 B.removeAttribute("null-pointer-is-valid");
5354 if (NullPointerIsValid)
5355 B.addAttribute(Attribute::NullPointerIsValid);
5356 }
5357}
5358
5359void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
5360 // clang.arc.attachedcall bundles are now required to have an operand.
5361 // If they don't, it's okay to drop them entirely: when there is an operand,
5362 // the "attachedcall" is meaningful and required, but without an operand,
5363 // it's just a marker NOP. Dropping it merely prevents an optimization.
5364 erase_if(Bundles, [&](OperandBundleDef &OBD) {
5365 return OBD.getTag() == "clang.arc.attachedcall" &&
5366 OBD.inputs().empty();
5367 });
5368}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu AMDGPU Register Bank Select
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:88
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:72
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:99
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
Definition: AutoUpgrade.cpp:52
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
Definition: AutoUpgrade.cpp:56
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
@ Default
Definition: DwarfDebug.cpp:87
This file contains constants used for implementing Dwarf debug support.
uint64_t Addr
std::string Name
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define R2(n)
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
uint64_t High
IntegerType * Int32Ty
LLVMContext & Context
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Class to represent array types.
Definition: DerivedTypes.h:371
Type * getElementType() const
Definition: DerivedTypes.h:384
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
Definition: Instructions.h:881
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ FAdd
*p = old + v
Definition: Instructions.h:785
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
AttributeSet getFnAttrs() const
The function attributes are returned.
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute > > Attrs)
Create an AttributeList with the specified parameters in it.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1455
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1703
Value * getCalledOperand() const
Definition: InstrTypes.h:1696
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1784
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1648
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1561
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1639
void setCalledOperand(Value *V)
Definition: InstrTypes.h:1739
unsigned arg_size() const
Definition: InstrTypes.h:1646
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1780
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1742
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1663
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2126
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2072
static Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2112
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:204
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:153
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1356
static ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
Definition: Constants.cpp:1499
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
DWARF expression.
static DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
This class represents an Operation in the Expression.
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
Class to represent function types.
Definition: DerivedTypes.h:103
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Type * getReturnType() const
Definition: DerivedTypes.h:124
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:162
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:200
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:230
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition: Function.cpp:389
size_t arg_size() const
Definition: Function.h:846
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:205
Argument * getArg(unsigned i) const
Definition: Function.h:831
LinkageTypes getLinkage() const
Definition: GlobalValue.h:545
Type * getValueType() const
Definition: GlobalValue.h:296
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:455
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1715
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1554
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2455
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:505
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2506
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1608
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Definition: IRBuilder.h:1039
Value * CreateSIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2077
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2443
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:533
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1806
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1527
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2153
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1214
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2499
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:578
Value * CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2252
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2022
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:520
ConstantInt * getInt8(uint8_t C)
Get a constant 8-bit value.
Definition: IRBuilder.h:470
Value * CreateUIToFP(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2070
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition: IRBuilder.h:515
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2260
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1748
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2224
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1338
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2110
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1789
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1410
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2010
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2477
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:598
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:465
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition: IRBuilder.h:2532
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1853
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:563
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2236
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2179
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1825
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2395
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1450
Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2093
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1513
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2244
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2334
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Definition: IRBuilder.h:1581
Value * CreateFNeg(Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1729
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:510
Type * getBFloatTy()
Fetch the type representing a 16-bit brain floating point value.
Definition: IRBuilder.h:543
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1355
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:267
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1633
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
LLVMContext & getContext() const
Definition: Metadata.h:1231
A single uniqued string.
Definition: Metadata.h:720
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:597
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1498
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:103
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition: Module.h:115
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition: Module.h:136
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition: Module.h:118
@ Min
Takes the min of the two values, which are required to be integers.
Definition: Module.h:150
@ Max
Takes the max of the two values, which are required to be integers.
Definition: Module.h:147
A tuple of MDNodes.
Definition: Metadata.h:1729
void setOperand(unsigned I, MDNode *New)
Definition: Metadata.cpp:1387
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1379
unsigned getNumOperands() const
Definition: Metadata.cpp:1375
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: InstrTypes.h:1408
ArrayRef< InputTy > inputs() const
Definition: InstrTypes.h:1423
StringRef getTag() const
Definition: InstrTypes.h:1431
Class to represent pointers.
Definition: DerivedTypes.h:646
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition: Regex.cpp:83
Class to represent scalable SIMD vectors.
Definition: DerivedTypes.h:586
uint64_t getMinNumElements() const
Get the minimum number of elements in this vector.
Definition: DerivedTypes.h:634
ArrayRef< int > getShuffleMask() const
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
size_t size() const
Definition: SmallVector.h:91
void reserve(size_type N)
Definition: SmallVector.h:676
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:605
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
static constexpr size_t npos
Definition: StringRef.h:52
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & StartsWith(StringLiteral S, T Value)
Definition: StringSwitch.h:83
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:342
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getBFloatTy(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:146
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:262
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
bool hasName() const
Definition: Value.h:261
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
self_iterator getIterator()
Definition: ilist_node.h:109
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
Definition: Function.cpp:1295
std::optional< Function * > remangleIntrinsicFunction(Function *F)
Definition: Function.cpp:1744
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1017
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1451
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn)
This is a more granular function that simply checks an intrinsic function for upgrading,...
MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
Op::Description Desc
void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1745
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
unsigned getDebugMetadataVersionFromModule(const Module &M)
Return Debug Info Metadata Version by checking module flags.
Definition: DebugInfo.cpp:922
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:588
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
@ Dynamic
Denotes mode unknown at compile time.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2060
bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition: Metadata.h:52
bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:6977
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117