LLVM 20.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/Constants.h"
21#include "llvm/IR/DebugInfo.h"
24#include "llvm/IR/Function.h"
25#include "llvm/IR/IRBuilder.h"
26#include "llvm/IR/InstVisitor.h"
27#include "llvm/IR/Instruction.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/IntrinsicsAArch64.h"
31#include "llvm/IR/IntrinsicsARM.h"
32#include "llvm/IR/IntrinsicsNVPTX.h"
33#include "llvm/IR/IntrinsicsRISCV.h"
34#include "llvm/IR/IntrinsicsWebAssembly.h"
35#include "llvm/IR/IntrinsicsX86.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/Verifier.h"
42#include "llvm/Support/Regex.h"
44#include <cstring>
45
46using namespace llvm;
47
48static cl::opt<bool>
49 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
50 cl::desc("Disable autoupgrade of debug info"));
51
52static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
53
54// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
55// changed their type from v4f32 to v2i64.
57 Function *&NewFn) {
58 // Check whether this is an old version of the function, which received
59 // v4f32 arguments.
60 Type *Arg0Type = F->getFunctionType()->getParamType(0);
61 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
62 return false;
63
64 // Yes, it's old, replace it with new version.
65 rename(F);
66 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
67 return true;
68}
69
70// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
71// arguments have changed their type from i32 to i8.
73 Function *&NewFn) {
74 // Check that the last argument is an i32.
75 Type *LastArgType = F->getFunctionType()->getParamType(
76 F->getFunctionType()->getNumParams() - 1);
77 if (!LastArgType->isIntegerTy(32))
78 return false;
79
80 // Move this function aside and map down.
81 rename(F);
82 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
83 return true;
84}
85
86// Upgrade the declaration of fp compare intrinsics that change return type
87// from scalar to vXi1 mask.
89 Function *&NewFn) {
90 // Check if the return type is a vector.
91 if (F->getReturnType()->isVectorTy())
92 return false;
93
94 rename(F);
95 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
96 return true;
97}
98
100 Function *&NewFn) {
101 if (F->getReturnType()->getScalarType()->isBFloatTy())
102 return false;
103
104 rename(F);
105 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
106 return true;
107}
108
110 Function *&NewFn) {
111 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
112 return false;
113
114 rename(F);
115 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
116 return true;
117}
118
120 // All of the intrinsics matches below should be marked with which llvm
121 // version started autoupgrading them. At some point in the future we would
122 // like to use this information to remove upgrade code for some older
123 // intrinsics. It is currently undecided how we will determine that future
124 // point.
125 if (Name.consume_front("avx."))
126 return (Name.starts_with("blend.p") || // Added in 3.7
127 Name == "cvt.ps2.pd.256" || // Added in 3.9
128 Name == "cvtdq2.pd.256" || // Added in 3.9
129 Name == "cvtdq2.ps.256" || // Added in 7.0
130 Name.starts_with("movnt.") || // Added in 3.2
131 Name.starts_with("sqrt.p") || // Added in 7.0
132 Name.starts_with("storeu.") || // Added in 3.9
133 Name.starts_with("vbroadcast.s") || // Added in 3.5
134 Name.starts_with("vbroadcastf128") || // Added in 4.0
135 Name.starts_with("vextractf128.") || // Added in 3.7
136 Name.starts_with("vinsertf128.") || // Added in 3.7
137 Name.starts_with("vperm2f128.") || // Added in 6.0
138 Name.starts_with("vpermil.")); // Added in 3.1
139
140 if (Name.consume_front("avx2."))
141 return (Name == "movntdqa" || // Added in 5.0
142 Name.starts_with("pabs.") || // Added in 6.0
143 Name.starts_with("padds.") || // Added in 8.0
144 Name.starts_with("paddus.") || // Added in 8.0
145 Name.starts_with("pblendd.") || // Added in 3.7
146 Name == "pblendw" || // Added in 3.7
147 Name.starts_with("pbroadcast") || // Added in 3.8
148 Name.starts_with("pcmpeq.") || // Added in 3.1
149 Name.starts_with("pcmpgt.") || // Added in 3.1
150 Name.starts_with("pmax") || // Added in 3.9
151 Name.starts_with("pmin") || // Added in 3.9
152 Name.starts_with("pmovsx") || // Added in 3.9
153 Name.starts_with("pmovzx") || // Added in 3.9
154 Name == "pmul.dq" || // Added in 7.0
155 Name == "pmulu.dq" || // Added in 7.0
156 Name.starts_with("psll.dq") || // Added in 3.7
157 Name.starts_with("psrl.dq") || // Added in 3.7
158 Name.starts_with("psubs.") || // Added in 8.0
159 Name.starts_with("psubus.") || // Added in 8.0
160 Name.starts_with("vbroadcast") || // Added in 3.8
161 Name == "vbroadcasti128" || // Added in 3.7
162 Name == "vextracti128" || // Added in 3.7
163 Name == "vinserti128" || // Added in 3.7
164 Name == "vperm2i128"); // Added in 6.0
165
166 if (Name.consume_front("avx512.")) {
167 if (Name.consume_front("mask."))
168 // 'avx512.mask.*'
169 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
170 Name.starts_with("and.") || // Added in 3.9
171 Name.starts_with("andn.") || // Added in 3.9
172 Name.starts_with("broadcast.s") || // Added in 3.9
173 Name.starts_with("broadcastf32x4.") || // Added in 6.0
174 Name.starts_with("broadcastf32x8.") || // Added in 6.0
175 Name.starts_with("broadcastf64x2.") || // Added in 6.0
176 Name.starts_with("broadcastf64x4.") || // Added in 6.0
177 Name.starts_with("broadcasti32x4.") || // Added in 6.0
178 Name.starts_with("broadcasti32x8.") || // Added in 6.0
179 Name.starts_with("broadcasti64x2.") || // Added in 6.0
180 Name.starts_with("broadcasti64x4.") || // Added in 6.0
181 Name.starts_with("cmp.b") || // Added in 5.0
182 Name.starts_with("cmp.d") || // Added in 5.0
183 Name.starts_with("cmp.q") || // Added in 5.0
184 Name.starts_with("cmp.w") || // Added in 5.0
185 Name.starts_with("compress.b") || // Added in 9.0
186 Name.starts_with("compress.d") || // Added in 9.0
187 Name.starts_with("compress.p") || // Added in 9.0
188 Name.starts_with("compress.q") || // Added in 9.0
189 Name.starts_with("compress.store.") || // Added in 7.0
190 Name.starts_with("compress.w") || // Added in 9.0
191 Name.starts_with("conflict.") || // Added in 9.0
192 Name.starts_with("cvtdq2pd.") || // Added in 4.0
193 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
194 Name == "cvtpd2dq.256" || // Added in 7.0
195 Name == "cvtpd2ps.256" || // Added in 7.0
196 Name == "cvtps2pd.128" || // Added in 7.0
197 Name == "cvtps2pd.256" || // Added in 7.0
198 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
199 Name == "cvtqq2ps.256" || // Added in 9.0
200 Name == "cvtqq2ps.512" || // Added in 9.0
201 Name == "cvttpd2dq.256" || // Added in 7.0
202 Name == "cvttps2dq.128" || // Added in 7.0
203 Name == "cvttps2dq.256" || // Added in 7.0
204 Name.starts_with("cvtudq2pd.") || // Added in 4.0
205 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
206 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
207 Name == "cvtuqq2ps.256" || // Added in 9.0
208 Name == "cvtuqq2ps.512" || // Added in 9.0
209 Name.starts_with("dbpsadbw.") || // Added in 7.0
210 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
211 Name.starts_with("expand.b") || // Added in 9.0
212 Name.starts_with("expand.d") || // Added in 9.0
213 Name.starts_with("expand.load.") || // Added in 7.0
214 Name.starts_with("expand.p") || // Added in 9.0
215 Name.starts_with("expand.q") || // Added in 9.0
216 Name.starts_with("expand.w") || // Added in 9.0
217 Name.starts_with("fpclass.p") || // Added in 7.0
218 Name.starts_with("insert") || // Added in 4.0
219 Name.starts_with("load.") || // Added in 3.9
220 Name.starts_with("loadu.") || // Added in 3.9
221 Name.starts_with("lzcnt.") || // Added in 5.0
222 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
223 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
224 Name.starts_with("movddup") || // Added in 3.9
225 Name.starts_with("move.s") || // Added in 4.0
226 Name.starts_with("movshdup") || // Added in 3.9
227 Name.starts_with("movsldup") || // Added in 3.9
228 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with("or.") || // Added in 3.9
230 Name.starts_with("pabs.") || // Added in 6.0
231 Name.starts_with("packssdw.") || // Added in 5.0
232 Name.starts_with("packsswb.") || // Added in 5.0
233 Name.starts_with("packusdw.") || // Added in 5.0
234 Name.starts_with("packuswb.") || // Added in 5.0
235 Name.starts_with("padd.") || // Added in 4.0
236 Name.starts_with("padds.") || // Added in 8.0
237 Name.starts_with("paddus.") || // Added in 8.0
238 Name.starts_with("palignr.") || // Added in 3.9
239 Name.starts_with("pand.") || // Added in 3.9
240 Name.starts_with("pandn.") || // Added in 3.9
241 Name.starts_with("pavg") || // Added in 6.0
242 Name.starts_with("pbroadcast") || // Added in 6.0
243 Name.starts_with("pcmpeq.") || // Added in 3.9
244 Name.starts_with("pcmpgt.") || // Added in 3.9
245 Name.starts_with("perm.df.") || // Added in 3.9
246 Name.starts_with("perm.di.") || // Added in 3.9
247 Name.starts_with("permvar.") || // Added in 7.0
248 Name.starts_with("pmaddubs.w.") || // Added in 7.0
249 Name.starts_with("pmaddw.d.") || // Added in 7.0
250 Name.starts_with("pmax") || // Added in 4.0
251 Name.starts_with("pmin") || // Added in 4.0
252 Name == "pmov.qd.256" || // Added in 9.0
253 Name == "pmov.qd.512" || // Added in 9.0
254 Name == "pmov.wb.256" || // Added in 9.0
255 Name == "pmov.wb.512" || // Added in 9.0
256 Name.starts_with("pmovsx") || // Added in 4.0
257 Name.starts_with("pmovzx") || // Added in 4.0
258 Name.starts_with("pmul.dq.") || // Added in 4.0
259 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
260 Name.starts_with("pmulh.w.") || // Added in 7.0
261 Name.starts_with("pmulhu.w.") || // Added in 7.0
262 Name.starts_with("pmull.") || // Added in 4.0
263 Name.starts_with("pmultishift.qb.") || // Added in 8.0
264 Name.starts_with("pmulu.dq.") || // Added in 4.0
265 Name.starts_with("por.") || // Added in 3.9
266 Name.starts_with("prol.") || // Added in 8.0
267 Name.starts_with("prolv.") || // Added in 8.0
268 Name.starts_with("pror.") || // Added in 8.0
269 Name.starts_with("prorv.") || // Added in 8.0
270 Name.starts_with("pshuf.b.") || // Added in 4.0
271 Name.starts_with("pshuf.d.") || // Added in 3.9
272 Name.starts_with("pshufh.w.") || // Added in 3.9
273 Name.starts_with("pshufl.w.") || // Added in 3.9
274 Name.starts_with("psll.d") || // Added in 4.0
275 Name.starts_with("psll.q") || // Added in 4.0
276 Name.starts_with("psll.w") || // Added in 4.0
277 Name.starts_with("pslli") || // Added in 4.0
278 Name.starts_with("psllv") || // Added in 4.0
279 Name.starts_with("psra.d") || // Added in 4.0
280 Name.starts_with("psra.q") || // Added in 4.0
281 Name.starts_with("psra.w") || // Added in 4.0
282 Name.starts_with("psrai") || // Added in 4.0
283 Name.starts_with("psrav") || // Added in 4.0
284 Name.starts_with("psrl.d") || // Added in 4.0
285 Name.starts_with("psrl.q") || // Added in 4.0
286 Name.starts_with("psrl.w") || // Added in 4.0
287 Name.starts_with("psrli") || // Added in 4.0
288 Name.starts_with("psrlv") || // Added in 4.0
289 Name.starts_with("psub.") || // Added in 4.0
290 Name.starts_with("psubs.") || // Added in 8.0
291 Name.starts_with("psubus.") || // Added in 8.0
292 Name.starts_with("pternlog.") || // Added in 7.0
293 Name.starts_with("punpckh") || // Added in 3.9
294 Name.starts_with("punpckl") || // Added in 3.9
295 Name.starts_with("pxor.") || // Added in 3.9
296 Name.starts_with("shuf.f") || // Added in 6.0
297 Name.starts_with("shuf.i") || // Added in 6.0
298 Name.starts_with("shuf.p") || // Added in 4.0
299 Name.starts_with("sqrt.p") || // Added in 7.0
300 Name.starts_with("store.b.") || // Added in 3.9
301 Name.starts_with("store.d.") || // Added in 3.9
302 Name.starts_with("store.p") || // Added in 3.9
303 Name.starts_with("store.q.") || // Added in 3.9
304 Name.starts_with("store.w.") || // Added in 3.9
305 Name == "store.ss" || // Added in 7.0
306 Name.starts_with("storeu.") || // Added in 3.9
307 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
308 Name.starts_with("ucmp.") || // Added in 5.0
309 Name.starts_with("unpckh.") || // Added in 3.9
310 Name.starts_with("unpckl.") || // Added in 3.9
311 Name.starts_with("valign.") || // Added in 4.0
312 Name == "vcvtph2ps.128" || // Added in 11.0
313 Name == "vcvtph2ps.256" || // Added in 11.0
314 Name.starts_with("vextract") || // Added in 4.0
315 Name.starts_with("vfmadd.") || // Added in 7.0
316 Name.starts_with("vfmaddsub.") || // Added in 7.0
317 Name.starts_with("vfnmadd.") || // Added in 7.0
318 Name.starts_with("vfnmsub.") || // Added in 7.0
319 Name.starts_with("vpdpbusd.") || // Added in 7.0
320 Name.starts_with("vpdpbusds.") || // Added in 7.0
321 Name.starts_with("vpdpwssd.") || // Added in 7.0
322 Name.starts_with("vpdpwssds.") || // Added in 7.0
323 Name.starts_with("vpermi2var.") || // Added in 7.0
324 Name.starts_with("vpermil.p") || // Added in 3.9
325 Name.starts_with("vpermilvar.") || // Added in 4.0
326 Name.starts_with("vpermt2var.") || // Added in 7.0
327 Name.starts_with("vpmadd52") || // Added in 7.0
328 Name.starts_with("vpshld.") || // Added in 7.0
329 Name.starts_with("vpshldv.") || // Added in 8.0
330 Name.starts_with("vpshrd.") || // Added in 7.0
331 Name.starts_with("vpshrdv.") || // Added in 8.0
332 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
333 Name.starts_with("xor.")); // Added in 3.9
334
335 if (Name.consume_front("mask3."))
336 // 'avx512.mask3.*'
337 return (Name.starts_with("vfmadd.") || // Added in 7.0
338 Name.starts_with("vfmaddsub.") || // Added in 7.0
339 Name.starts_with("vfmsub.") || // Added in 7.0
340 Name.starts_with("vfmsubadd.") || // Added in 7.0
341 Name.starts_with("vfnmsub.")); // Added in 7.0
342
343 if (Name.consume_front("maskz."))
344 // 'avx512.maskz.*'
345 return (Name.starts_with("pternlog.") || // Added in 7.0
346 Name.starts_with("vfmadd.") || // Added in 7.0
347 Name.starts_with("vfmaddsub.") || // Added in 7.0
348 Name.starts_with("vpdpbusd.") || // Added in 7.0
349 Name.starts_with("vpdpbusds.") || // Added in 7.0
350 Name.starts_with("vpdpwssd.") || // Added in 7.0
351 Name.starts_with("vpdpwssds.") || // Added in 7.0
352 Name.starts_with("vpermt2var.") || // Added in 7.0
353 Name.starts_with("vpmadd52") || // Added in 7.0
354 Name.starts_with("vpshldv.") || // Added in 8.0
355 Name.starts_with("vpshrdv.")); // Added in 8.0
356
357 // 'avx512.*'
358 return (Name == "movntdqa" || // Added in 5.0
359 Name == "pmul.dq.512" || // Added in 7.0
360 Name == "pmulu.dq.512" || // Added in 7.0
361 Name.starts_with("broadcastm") || // Added in 6.0
362 Name.starts_with("cmp.p") || // Added in 12.0
363 Name.starts_with("cvtb2mask.") || // Added in 7.0
364 Name.starts_with("cvtd2mask.") || // Added in 7.0
365 Name.starts_with("cvtmask2") || // Added in 5.0
366 Name.starts_with("cvtq2mask.") || // Added in 7.0
367 Name == "cvtusi2sd" || // Added in 7.0
368 Name.starts_with("cvtw2mask.") || // Added in 7.0
369 Name == "kand.w" || // Added in 7.0
370 Name == "kandn.w" || // Added in 7.0
371 Name == "knot.w" || // Added in 7.0
372 Name == "kor.w" || // Added in 7.0
373 Name == "kortestc.w" || // Added in 7.0
374 Name == "kortestz.w" || // Added in 7.0
375 Name.starts_with("kunpck") || // added in 6.0
376 Name == "kxnor.w" || // Added in 7.0
377 Name == "kxor.w" || // Added in 7.0
378 Name.starts_with("padds.") || // Added in 8.0
379 Name.starts_with("pbroadcast") || // Added in 3.9
380 Name.starts_with("prol") || // Added in 8.0
381 Name.starts_with("pror") || // Added in 8.0
382 Name.starts_with("psll.dq") || // Added in 3.9
383 Name.starts_with("psrl.dq") || // Added in 3.9
384 Name.starts_with("psubs.") || // Added in 8.0
385 Name.starts_with("ptestm") || // Added in 6.0
386 Name.starts_with("ptestnm") || // Added in 6.0
387 Name.starts_with("storent.") || // Added in 3.9
388 Name.starts_with("vbroadcast.s") || // Added in 7.0
389 Name.starts_with("vpshld.") || // Added in 8.0
390 Name.starts_with("vpshrd.")); // Added in 8.0
391 }
392
393 if (Name.consume_front("fma."))
394 return (Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmsub.") || // Added in 7.0
396 Name.starts_with("vfmsubadd.") || // Added in 7.0
397 Name.starts_with("vfnmadd.") || // Added in 7.0
398 Name.starts_with("vfnmsub.")); // Added in 7.0
399
400 if (Name.consume_front("fma4."))
401 return Name.starts_with("vfmadd.s"); // Added in 7.0
402
403 if (Name.consume_front("sse."))
404 return (Name == "add.ss" || // Added in 4.0
405 Name == "cvtsi2ss" || // Added in 7.0
406 Name == "cvtsi642ss" || // Added in 7.0
407 Name == "div.ss" || // Added in 4.0
408 Name == "mul.ss" || // Added in 4.0
409 Name.starts_with("sqrt.p") || // Added in 7.0
410 Name == "sqrt.ss" || // Added in 7.0
411 Name.starts_with("storeu.") || // Added in 3.9
412 Name == "sub.ss"); // Added in 4.0
413
414 if (Name.consume_front("sse2."))
415 return (Name == "add.sd" || // Added in 4.0
416 Name == "cvtdq2pd" || // Added in 3.9
417 Name == "cvtdq2ps" || // Added in 7.0
418 Name == "cvtps2pd" || // Added in 3.9
419 Name == "cvtsi2sd" || // Added in 7.0
420 Name == "cvtsi642sd" || // Added in 7.0
421 Name == "cvtss2sd" || // Added in 7.0
422 Name == "div.sd" || // Added in 4.0
423 Name == "mul.sd" || // Added in 4.0
424 Name.starts_with("padds.") || // Added in 8.0
425 Name.starts_with("paddus.") || // Added in 8.0
426 Name.starts_with("pcmpeq.") || // Added in 3.1
427 Name.starts_with("pcmpgt.") || // Added in 3.1
428 Name == "pmaxs.w" || // Added in 3.9
429 Name == "pmaxu.b" || // Added in 3.9
430 Name == "pmins.w" || // Added in 3.9
431 Name == "pminu.b" || // Added in 3.9
432 Name == "pmulu.dq" || // Added in 7.0
433 Name.starts_with("pshuf") || // Added in 3.9
434 Name.starts_with("psll.dq") || // Added in 3.7
435 Name.starts_with("psrl.dq") || // Added in 3.7
436 Name.starts_with("psubs.") || // Added in 8.0
437 Name.starts_with("psubus.") || // Added in 8.0
438 Name.starts_with("sqrt.p") || // Added in 7.0
439 Name == "sqrt.sd" || // Added in 7.0
440 Name == "storel.dq" || // Added in 3.9
441 Name.starts_with("storeu.") || // Added in 3.9
442 Name == "sub.sd"); // Added in 4.0
443
444 if (Name.consume_front("sse41."))
445 return (Name.starts_with("blendp") || // Added in 3.7
446 Name == "movntdqa" || // Added in 5.0
447 Name == "pblendw" || // Added in 3.7
448 Name == "pmaxsb" || // Added in 3.9
449 Name == "pmaxsd" || // Added in 3.9
450 Name == "pmaxud" || // Added in 3.9
451 Name == "pmaxuw" || // Added in 3.9
452 Name == "pminsb" || // Added in 3.9
453 Name == "pminsd" || // Added in 3.9
454 Name == "pminud" || // Added in 3.9
455 Name == "pminuw" || // Added in 3.9
456 Name.starts_with("pmovsx") || // Added in 3.8
457 Name.starts_with("pmovzx") || // Added in 3.9
458 Name == "pmuldq"); // Added in 7.0
459
460 if (Name.consume_front("sse42."))
461 return Name == "crc32.64.8"; // Added in 3.4
462
463 if (Name.consume_front("sse4a."))
464 return Name.starts_with("movnt."); // Added in 3.9
465
466 if (Name.consume_front("ssse3."))
467 return (Name == "pabs.b.128" || // Added in 6.0
468 Name == "pabs.d.128" || // Added in 6.0
469 Name == "pabs.w.128"); // Added in 6.0
470
471 if (Name.consume_front("xop."))
472 return (Name == "vpcmov" || // Added in 3.8
473 Name == "vpcmov.256" || // Added in 5.0
474 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
475 Name.starts_with("vprot")); // Added in 8.0
476
477 return (Name == "addcarry.u32" || // Added in 8.0
478 Name == "addcarry.u64" || // Added in 8.0
479 Name == "addcarryx.u32" || // Added in 8.0
480 Name == "addcarryx.u64" || // Added in 8.0
481 Name == "subborrow.u32" || // Added in 8.0
482 Name == "subborrow.u64" || // Added in 8.0
483 Name.starts_with("vcvtph2ps.")); // Added in 11.0
484}
485
487 Function *&NewFn) {
488 // Only handle intrinsics that start with "x86.".
489 if (!Name.consume_front("x86."))
490 return false;
491
493 NewFn = nullptr;
494 return true;
495 }
496
497 if (Name == "rdtscp") { // Added in 8.0
498 // If this intrinsic has 0 operands, it's the new version.
499 if (F->getFunctionType()->getNumParams() == 0)
500 return false;
501
502 rename(F);
503 NewFn = Intrinsic::getDeclaration(F->getParent(),
504 Intrinsic::x86_rdtscp);
505 return true;
506 }
507
509
510 // SSE4.1 ptest functions may have an old signature.
511 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
513 .Case("c", Intrinsic::x86_sse41_ptestc)
514 .Case("z", Intrinsic::x86_sse41_ptestz)
515 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
518 return upgradePTESTIntrinsic(F, ID, NewFn);
519
520 return false;
521 }
522
523 // Several blend and other instructions with masks used the wrong number of
524 // bits.
525
526 // Added in 3.6
528 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
529 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
530 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
531 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
532 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
533 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
536 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
537
538 if (Name.consume_front("avx512.mask.cmp.")) {
539 // Added in 7.0
541 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
542 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
543 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
544 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
545 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
546 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
549 return upgradeX86MaskedFPCompare(F, ID, NewFn);
550 return false; // No other 'x86.avx523.mask.cmp.*'.
551 }
552
553 if (Name.consume_front("avx512bf16.")) {
554 // Added in 9.0
556 .Case("cvtne2ps2bf16.128",
557 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
558 .Case("cvtne2ps2bf16.256",
559 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
560 .Case("cvtne2ps2bf16.512",
561 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
562 .Case("mask.cvtneps2bf16.128",
563 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
564 .Case("cvtneps2bf16.256",
565 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
566 .Case("cvtneps2bf16.512",
567 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
570 return upgradeX86BF16Intrinsic(F, ID, NewFn);
571
572 // Added in 9.0
574 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
575 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
576 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
579 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
580 return false; // No other 'x86.avx512bf16.*'.
581 }
582
583 if (Name.consume_front("xop.")) {
585 if (Name.starts_with("vpermil2")) { // Added in 3.9
586 // Upgrade any XOP PERMIL2 index operand still using a float/double
587 // vector.
588 auto Idx = F->getFunctionType()->getParamType(2);
589 if (Idx->isFPOrFPVectorTy()) {
590 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
591 unsigned EltSize = Idx->getScalarSizeInBits();
592 if (EltSize == 64 && IdxSize == 128)
593 ID = Intrinsic::x86_xop_vpermil2pd;
594 else if (EltSize == 32 && IdxSize == 128)
595 ID = Intrinsic::x86_xop_vpermil2ps;
596 else if (EltSize == 64 && IdxSize == 256)
597 ID = Intrinsic::x86_xop_vpermil2pd_256;
598 else
599 ID = Intrinsic::x86_xop_vpermil2ps_256;
600 }
601 } else if (F->arg_size() == 2)
602 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
604 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
605 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
607
609 rename(F);
610 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
611 return true;
612 }
613 return false; // No other 'x86.xop.*'
614 }
615
616 if (Name == "seh.recoverfp") {
617 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
618 return true;
619 }
620
621 return false;
622}
623
624// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
625// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
628 Function *&NewFn) {
629 if (Name.starts_with("rbit")) {
630 // '(arm|aarch64).rbit'.
631 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
632 F->arg_begin()->getType());
633 return true;
634 }
635
636 if (Name == "thread.pointer") {
637 // '(arm|aarch64).thread.pointer'.
638 NewFn =
639 Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
640 return true;
641 }
642
643 bool Neon = Name.consume_front("neon.");
644 if (Neon) {
645 // '(arm|aarch64).neon.*'.
646 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
647 // v16i8 respectively.
648 if (Name.consume_front("bfdot.")) {
649 // (arm|aarch64).neon.bfdot.*'.
652 .Cases("v2f32.v8i8", "v4f32.v16i8",
653 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
654 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
657 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
658 assert((OperandWidth == 64 || OperandWidth == 128) &&
659 "Unexpected operand width");
660 LLVMContext &Ctx = F->getParent()->getContext();
661 std::array<Type *, 2> Tys{
662 {F->getReturnType(),
663 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
664 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
665 return true;
666 }
667 return false; // No other '(arm|aarch64).neon.bfdot.*'.
668 }
669
670 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
671 // anymore and accept v8bf16 instead of v16i8.
672 if (Name.consume_front("bfm")) {
673 // (arm|aarch64).neon.bfm*'.
674 if (Name.consume_back(".v4f32.v16i8")) {
675 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
678 .Case("mla",
679 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
680 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
681 .Case("lalb",
682 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
683 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
684 .Case("lalt",
685 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
686 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
689 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
690 return true;
691 }
692 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
693 }
694 return false; // No other '(arm|aarch64).neon.bfm*.
695 }
696 // Continue on to Aarch64 Neon or Arm Neon.
697 }
698 // Continue on to Arm or Aarch64.
699
700 if (IsArm) {
701 // 'arm.*'.
702 if (Neon) {
703 // 'arm.neon.*'.
705 .StartsWith("vclz.", Intrinsic::ctlz)
706 .StartsWith("vcnt.", Intrinsic::ctpop)
707 .StartsWith("vqadds.", Intrinsic::sadd_sat)
708 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
709 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
710 .StartsWith("vqsubu.", Intrinsic::usub_sat)
713 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
714 F->arg_begin()->getType());
715 return true;
716 }
717
718 if (Name.consume_front("vst")) {
719 // 'arm.neon.vst*'.
720 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
722 if (vstRegex.match(Name, &Groups)) {
723 static const Intrinsic::ID StoreInts[] = {
724 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
725 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
726
727 static const Intrinsic::ID StoreLaneInts[] = {
728 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
729 Intrinsic::arm_neon_vst4lane};
730
731 auto fArgs = F->getFunctionType()->params();
732 Type *Tys[] = {fArgs[0], fArgs[1]};
733 if (Groups[1].size() == 1)
734 NewFn = Intrinsic::getDeclaration(F->getParent(),
735 StoreInts[fArgs.size() - 3], Tys);
736 else
738 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
739 return true;
740 }
741 return false; // No other 'arm.neon.vst*'.
742 }
743
744 return false; // No other 'arm.neon.*'.
745 }
746
747 if (Name.consume_front("mve.")) {
748 // 'arm.mve.*'.
749 if (Name == "vctp64") {
750 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
751 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
752 // the function and deal with it below in UpgradeIntrinsicCall.
753 rename(F);
754 return true;
755 }
756 return false; // Not 'arm.mve.vctp64'.
757 }
758
759 // These too are changed to accept a v2i1 instead of the old v4i1.
760 if (Name.consume_back(".v4i1")) {
761 // 'arm.mve.*.v4i1'.
762 if (Name.consume_back(".predicated.v2i64.v4i32"))
763 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
764 return Name == "mull.int" || Name == "vqdmull";
765
766 if (Name.consume_back(".v2i64")) {
767 // 'arm.mve.*.v2i64.v4i1'
768 bool IsGather = Name.consume_front("vldr.gather.");
769 if (IsGather || Name.consume_front("vstr.scatter.")) {
770 if (Name.consume_front("base.")) {
771 // Optional 'wb.' prefix.
772 Name.consume_front("wb.");
773 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
774 // predicated.v2i64.v2i64.v4i1'.
775 return Name == "predicated.v2i64";
776 }
777
778 if (Name.consume_front("offset.predicated."))
779 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
780 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
781
782 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
783 return false;
784 }
785
786 return false; // No other 'arm.mve.*.v2i64.v4i1'.
787 }
788 return false; // No other 'arm.mve.*.v4i1'.
789 }
790 return false; // No other 'arm.mve.*'.
791 }
792
793 if (Name.consume_front("cde.vcx")) {
794 // 'arm.cde.vcx*'.
795 if (Name.consume_back(".predicated.v2i64.v4i1"))
796 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
797 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
798 Name == "3q" || Name == "3qa";
799
800 return false; // No other 'arm.cde.vcx*'.
801 }
802 } else {
803 // 'aarch64.*'.
804 if (Neon) {
805 // 'aarch64.neon.*'.
807 .StartsWith("frintn", Intrinsic::roundeven)
808 .StartsWith("rbit", Intrinsic::bitreverse)
811 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
812 F->arg_begin()->getType());
813 return true;
814 }
815
816 if (Name.starts_with("addp")) {
817 // 'aarch64.neon.addp*'.
818 if (F->arg_size() != 2)
819 return false; // Invalid IR.
820 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
821 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
822 NewFn = Intrinsic::getDeclaration(F->getParent(),
823 Intrinsic::aarch64_neon_faddp, Ty);
824 return true;
825 }
826 }
827 return false; // No other 'aarch64.neon.*'.
828 }
829 if (Name.consume_front("sve.")) {
830 // 'aarch64.sve.*'.
831 if (Name.consume_front("bf")) {
832 if (Name.consume_back(".lane")) {
833 // 'aarch64.sve.bf*.lane'.
836 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
837 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
838 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
841 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
842 return true;
843 }
844 return false; // No other 'aarch64.sve.bf*.lane'.
845 }
846 return false; // No other 'aarch64.sve.bf*'.
847 }
848
849 if (Name.consume_front("addqv")) {
850 // 'aarch64.sve.addqv'.
851 if (!F->getReturnType()->isFPOrFPVectorTy())
852 return false;
853
854 auto Args = F->getFunctionType()->params();
855 Type *Tys[] = {F->getReturnType(), Args[1]};
856 NewFn = Intrinsic::getDeclaration(F->getParent(),
857 Intrinsic::aarch64_sve_faddqv, Tys);
858 return true;
859 }
860
861 if (Name.consume_front("ld")) {
862 // 'aarch64.sve.ld*'.
863 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
864 if (LdRegex.match(Name)) {
865 Type *ScalarTy =
866 cast<VectorType>(F->getReturnType())->getElementType();
867 ElementCount EC =
868 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
869 Type *Ty = VectorType::get(ScalarTy, EC);
870 static const Intrinsic::ID LoadIDs[] = {
871 Intrinsic::aarch64_sve_ld2_sret,
872 Intrinsic::aarch64_sve_ld3_sret,
873 Intrinsic::aarch64_sve_ld4_sret,
874 };
875 NewFn = Intrinsic::getDeclaration(F->getParent(),
876 LoadIDs[Name[0] - '2'], Ty);
877 return true;
878 }
879 return false; // No other 'aarch64.sve.ld*'.
880 }
881
882 if (Name.consume_front("tuple.")) {
883 // 'aarch64.sve.tuple.*'.
884 if (Name.starts_with("get")) {
885 // 'aarch64.sve.tuple.get*'.
886 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
887 NewFn = Intrinsic::getDeclaration(F->getParent(),
888 Intrinsic::vector_extract, Tys);
889 return true;
890 }
891
892 if (Name.starts_with("set")) {
893 // 'aarch64.sve.tuple.set*'.
894 auto Args = F->getFunctionType()->params();
895 Type *Tys[] = {Args[0], Args[2], Args[1]};
896 NewFn = Intrinsic::getDeclaration(F->getParent(),
897 Intrinsic::vector_insert, Tys);
898 return true;
899 }
900
901 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
902 if (CreateTupleRegex.match(Name)) {
903 // 'aarch64.sve.tuple.create*'.
904 auto Args = F->getFunctionType()->params();
905 Type *Tys[] = {F->getReturnType(), Args[1]};
906 NewFn = Intrinsic::getDeclaration(F->getParent(),
907 Intrinsic::vector_insert, Tys);
908 return true;
909 }
910 return false; // No other 'aarch64.sve.tuple.*'.
911 }
912 return false; // No other 'aarch64.sve.*'.
913 }
914 }
915 return false; // No other 'arm.*', 'aarch64.*'.
916}
917
919 if (Name.consume_front("abs."))
921 .Case("bf16", Intrinsic::nvvm_abs_bf16)
922 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)
924
925 if (Name.consume_front("fma.rn."))
927 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
928 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
929 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
930 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
931 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
932 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
933 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
934 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
935 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
936 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
937 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
938 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
940
941 if (Name.consume_front("fmax."))
943 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
944 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
945 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
946 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
947 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
948 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
949 .Case("ftz.nan.xorsign.abs.bf16",
950 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
951 .Case("ftz.nan.xorsign.abs.bf16x2",
952 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
953 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
954 .Case("ftz.xorsign.abs.bf16x2",
955 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
956 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
957 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
958 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
959 .Case("nan.xorsign.abs.bf16x2",
960 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
961 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
962 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
964
965 if (Name.consume_front("fmin."))
967 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
968 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
969 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
970 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
971 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
972 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
973 .Case("ftz.nan.xorsign.abs.bf16",
974 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
975 .Case("ftz.nan.xorsign.abs.bf16x2",
976 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
977 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
978 .Case("ftz.xorsign.abs.bf16x2",
979 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
980 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
981 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
982 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
983 .Case("nan.xorsign.abs.bf16x2",
984 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
985 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
986 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
988
989 if (Name.consume_front("neg."))
991 .Case("bf16", Intrinsic::nvvm_neg_bf16)
992 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
994
996}
997
999 bool CanUpgradeDebugIntrinsicsToRecords) {
1000 assert(F && "Illegal to upgrade a non-existent Function.");
1001
1002 StringRef Name = F->getName();
1003
1004 // Quickly eliminate it, if it's not a candidate.
1005 if (!Name.consume_front("llvm.") || Name.empty())
1006 return false;
1007
1008 switch (Name[0]) {
1009 default: break;
1010 case 'a': {
1011 bool IsArm = Name.consume_front("arm.");
1012 if (IsArm || Name.consume_front("aarch64.")) {
1013 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1014 return true;
1015 break;
1016 }
1017
1018 if (Name.consume_front("amdgcn.")) {
1019 if (Name == "alignbit") {
1020 // Target specific intrinsic became redundant
1021 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
1022 {F->getReturnType()});
1023 return true;
1024 }
1025
1026 if (Name.consume_front("atomic.")) {
1027 if (Name.starts_with("inc") || Name.starts_with("dec")) {
1028 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so
1029 // there's no new declaration.
1030 NewFn = nullptr;
1031 return true;
1032 }
1033 break; // No other 'amdgcn.atomic.*'
1034 }
1035
1036 if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
1037 Name.starts_with("ds.fmax")) {
1038 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1039 // declaration.
1040 NewFn = nullptr;
1041 return true;
1042 }
1043
1044 if (Name.starts_with("ldexp.")) {
1045 // Target specific intrinsic became redundant
1047 F->getParent(), Intrinsic::ldexp,
1048 {F->getReturnType(), F->getArg(1)->getType()});
1049 return true;
1050 }
1051 break; // No other 'amdgcn.*'
1052 }
1053
1054 break;
1055 }
1056 case 'c': {
1057 if (F->arg_size() == 1) {
1059 .StartsWith("ctlz.", Intrinsic::ctlz)
1060 .StartsWith("cttz.", Intrinsic::cttz)
1063 rename(F);
1064 NewFn = Intrinsic::getDeclaration(F->getParent(), ID,
1065 F->arg_begin()->getType());
1066 return true;
1067 }
1068 }
1069
1070 if (F->arg_size() == 2 && Name == "coro.end") {
1071 rename(F);
1072 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);
1073 return true;
1074 }
1075
1076 break;
1077 }
1078 case 'd':
1079 if (Name.consume_front("dbg.")) {
1080 // Mark debug intrinsics for upgrade to new debug format.
1081 if (CanUpgradeDebugIntrinsicsToRecords &&
1082 F->getParent()->IsNewDbgInfoFormat) {
1083 if (Name == "addr" || Name == "value" || Name == "assign" ||
1084 Name == "declare" || Name == "label") {
1085 // There's no function to replace these with.
1086 NewFn = nullptr;
1087 // But we do want these to get upgraded.
1088 return true;
1089 }
1090 }
1091 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1092 // converted to DbgVariableRecords later.
1093 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1094 rename(F);
1095 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
1096 return true;
1097 }
1098 break; // No other 'dbg.*'.
1099 }
1100 break;
1101 case 'e':
1102 if (Name.consume_front("experimental.vector.")) {
1105 .StartsWith("extract.", Intrinsic::vector_extract)
1106 .StartsWith("insert.", Intrinsic::vector_insert)
1107 .StartsWith("splice.", Intrinsic::vector_splice)
1108 .StartsWith("reverse.", Intrinsic::vector_reverse)
1109 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1110 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1113 const auto *FT = F->getFunctionType();
1115 if (ID == Intrinsic::vector_extract ||
1116 ID == Intrinsic::vector_interleave2)
1117 // Extracting overloads the return type.
1118 Tys.push_back(FT->getReturnType());
1119 if (ID != Intrinsic::vector_interleave2)
1120 Tys.push_back(FT->getParamType(0));
1121 if (ID == Intrinsic::vector_insert)
1122 // Inserting overloads the inserted type.
1123 Tys.push_back(FT->getParamType(1));
1124 rename(F);
1125 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
1126 return true;
1127 }
1128
1129 if (Name.consume_front("reduce.")) {
1131 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1132 if (R.match(Name, &Groups))
1134 .Case("add", Intrinsic::vector_reduce_add)
1135 .Case("mul", Intrinsic::vector_reduce_mul)
1136 .Case("and", Intrinsic::vector_reduce_and)
1137 .Case("or", Intrinsic::vector_reduce_or)
1138 .Case("xor", Intrinsic::vector_reduce_xor)
1139 .Case("smax", Intrinsic::vector_reduce_smax)
1140 .Case("smin", Intrinsic::vector_reduce_smin)
1141 .Case("umax", Intrinsic::vector_reduce_umax)
1142 .Case("umin", Intrinsic::vector_reduce_umin)
1143 .Case("fmax", Intrinsic::vector_reduce_fmax)
1144 .Case("fmin", Intrinsic::vector_reduce_fmin)
1146
1147 bool V2 = false;
1149 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1150 Groups.clear();
1151 V2 = true;
1152 if (R2.match(Name, &Groups))
1154 .Case("fadd", Intrinsic::vector_reduce_fadd)
1155 .Case("fmul", Intrinsic::vector_reduce_fmul)
1157 }
1159 rename(F);
1160 auto Args = F->getFunctionType()->params();
1161 NewFn =
1162 Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});
1163 return true;
1164 }
1165 break; // No other 'expermental.vector.reduce.*'.
1166 }
1167 break; // No other 'experimental.vector.*'.
1168 }
1169 break; // No other 'e*'.
1170 case 'f':
1171 if (Name.starts_with("flt.rounds")) {
1172 rename(F);
1173 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);
1174 return true;
1175 }
1176 break;
1177 case 'i':
1178 if (Name.starts_with("invariant.group.barrier")) {
1179 // Rename invariant.group.barrier to launder.invariant.group
1180 auto Args = F->getFunctionType()->params();
1181 Type* ObjectPtr[1] = {Args[0]};
1182 rename(F);
1183 NewFn = Intrinsic::getDeclaration(F->getParent(),
1184 Intrinsic::launder_invariant_group, ObjectPtr);
1185 return true;
1186 }
1187 break;
1188 case 'm': {
1189 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1190 // alignment parameter to embedding the alignment as an attribute of
1191 // the pointer args.
1192 if (unsigned ID = StringSwitch<unsigned>(Name)
1193 .StartsWith("memcpy.", Intrinsic::memcpy)
1194 .StartsWith("memmove.", Intrinsic::memmove)
1195 .Default(0)) {
1196 if (F->arg_size() == 5) {
1197 rename(F);
1198 // Get the types of dest, src, and len
1199 ArrayRef<Type *> ParamTypes =
1200 F->getFunctionType()->params().slice(0, 3);
1201 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);
1202 return true;
1203 }
1204 }
1205 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1206 rename(F);
1207 // Get the types of dest, and len
1208 const auto *FT = F->getFunctionType();
1209 Type *ParamTypes[2] = {
1210 FT->getParamType(0), // Dest
1211 FT->getParamType(2) // len
1212 };
1213 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
1214 ParamTypes);
1215 return true;
1216 }
1217 break;
1218 }
1219 case 'n': {
1220 if (Name.consume_front("nvvm.")) {
1221 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1222 if (F->arg_size() == 1) {
1223 Intrinsic::ID IID =
1225 .Cases("brev32", "brev64", Intrinsic::bitreverse)
1226 .Case("clz.i", Intrinsic::ctlz)
1227 .Case("popc.i", Intrinsic::ctpop)
1229 if (IID != Intrinsic::not_intrinsic) {
1230 NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
1231 {F->getReturnType()});
1232 return true;
1233 }
1234 }
1235
1236 // Check for nvvm intrinsics that need a return type adjustment.
1237 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1239 if (IID != Intrinsic::not_intrinsic) {
1240 NewFn = nullptr;
1241 return true;
1242 }
1243 }
1244
1245 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1246 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1247 //
1248 // TODO: We could add lohi.i2d.
1249 bool Expand = false;
1250 if (Name.consume_front("abs."))
1251 // nvvm.abs.{i,ii}
1252 Expand = Name == "i" || Name == "ll";
1253 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")
1254 Expand = true;
1255 else if (Name.consume_front("max.") || Name.consume_front("min."))
1256 // nvvm.{min,max}.{i,ii,ui,ull}
1257 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||
1258 Name == "ui" || Name == "ull";
1259 else if (Name.consume_front("atomic.load.add."))
1260 // nvvm.atomic.load.add.{f32.p,f64.p}
1261 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");
1262 else
1263 Expand = false;
1264
1265 if (Expand) {
1266 NewFn = nullptr;
1267 return true;
1268 }
1269 break; // No other 'nvvm.*'.
1270 }
1271 break;
1272 }
1273 case 'o':
1274 // We only need to change the name to match the mangling including the
1275 // address space.
1276 if (Name.starts_with("objectsize.")) {
1277 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1278 if (F->arg_size() == 2 || F->arg_size() == 3 ||
1279 F->getName() !=
1280 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
1281 rename(F);
1282 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
1283 Tys);
1284 return true;
1285 }
1286 }
1287 break;
1288
1289 case 'p':
1290 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1291 rename(F);
1293 F->getParent(), Intrinsic::ptr_annotation,
1294 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1295 return true;
1296 }
1297 break;
1298
1299 case 'r': {
1300 if (Name.consume_front("riscv.")) {
1303 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1304 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1305 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1306 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1309 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1310 rename(F);
1311 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1312 return true;
1313 }
1314 break; // No other applicable upgrades.
1315 }
1316
1318 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1319 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1322 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1323 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1324 rename(F);
1325 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1326 return true;
1327 }
1328 break; // No other applicable upgrades.
1329 }
1330
1332 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1333 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1334 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1335 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1336 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1337 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1340 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1341 rename(F);
1342 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1343 return true;
1344 }
1345 break; // No other applicable upgrades.
1346 }
1347 break; // No other 'riscv.*' intrinsics
1348 }
1349 } break;
1350
1351 case 's':
1352 if (Name == "stackprotectorcheck") {
1353 NewFn = nullptr;
1354 return true;
1355 }
1356 break;
1357
1358 case 'v': {
1359 if (Name == "var.annotation" && F->arg_size() == 4) {
1360 rename(F);
1362 F->getParent(), Intrinsic::var_annotation,
1363 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1364 return true;
1365 }
1366 break;
1367 }
1368
1369 case 'w':
1370 if (Name.consume_front("wasm.")) {
1373 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1374 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1375 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1378 rename(F);
1379 NewFn =
1380 Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());
1381 return true;
1382 }
1383
1384 if (Name.consume_front("dot.i8x16.i7x16.")) {
1386 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1387 .Case("add.signed",
1388 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1391 rename(F);
1392 NewFn = Intrinsic::getDeclaration(F->getParent(), ID);
1393 return true;
1394 }
1395 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1396 }
1397 break; // No other 'wasm.*'.
1398 }
1399 break;
1400
1401 case 'x':
1402 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1403 return true;
1404 }
1405
1406 auto *ST = dyn_cast<StructType>(F->getReturnType());
1407 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1408 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1409 // Replace return type with literal non-packed struct. Only do this for
1410 // intrinsics declared to return a struct, not for intrinsics with
1411 // overloaded return type, in which case the exact struct type will be
1412 // mangled into the name.
1415 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1416 auto *FT = F->getFunctionType();
1417 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1418 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1419 std::string Name = F->getName().str();
1420 rename(F);
1421 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1422 Name, F->getParent());
1423
1424 // The new function may also need remangling.
1425 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1426 NewFn = *Result;
1427 return true;
1428 }
1429 }
1430
1431 // Remangle our intrinsic since we upgrade the mangling
1433 if (Result != std::nullopt) {
1434 NewFn = *Result;
1435 return true;
1436 }
1437
1438 // This may not belong here. This function is effectively being overloaded
1439 // to both detect an intrinsic which needs upgrading, and to provide the
1440 // upgraded form of the intrinsic. We should perhaps have two separate
1441 // functions for this.
1442 return false;
1443}
1444
1446 bool CanUpgradeDebugIntrinsicsToRecords) {
1447 NewFn = nullptr;
1448 bool Upgraded =
1449 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1450 assert(F != NewFn && "Intrinsic function upgraded to the same function");
1451
1452 // Upgrade intrinsic attributes. This does not change the function.
1453 if (NewFn)
1454 F = NewFn;
1455 if (Intrinsic::ID id = F->getIntrinsicID())
1456 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
1457 return Upgraded;
1458}
1459
1461 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1462 GV->getName() == "llvm.global_dtors")) ||
1463 !GV->hasInitializer())
1464 return nullptr;
1465 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
1466 if (!ATy)
1467 return nullptr;
1468 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
1469 if (!STy || STy->getNumElements() != 2)
1470 return nullptr;
1471
1472 LLVMContext &C = GV->getContext();
1473 IRBuilder<> IRB(C);
1474 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1475 IRB.getPtrTy());
1476 Constant *Init = GV->getInitializer();
1477 unsigned N = Init->getNumOperands();
1478 std::vector<Constant *> NewCtors(N);
1479 for (unsigned i = 0; i != N; ++i) {
1480 auto Ctor = cast<Constant>(Init->getOperand(i));
1481 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1482 Ctor->getAggregateElement(1),
1484 }
1485 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1486
1487 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1488 NewInit, GV->getName());
1489}
1490
1491// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1492// to byte shuffles.
1494 unsigned Shift) {
1495 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1496 unsigned NumElts = ResultTy->getNumElements() * 8;
1497
1498 // Bitcast from a 64-bit element type to a byte element type.
1499 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1500 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1501
1502 // We'll be shuffling in zeroes.
1503 Value *Res = Constant::getNullValue(VecTy);
1504
1505 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1506 // we'll just return the zero vector.
1507 if (Shift < 16) {
1508 int Idxs[64];
1509 // 256/512-bit version is split into 2/4 16-byte lanes.
1510 for (unsigned l = 0; l != NumElts; l += 16)
1511 for (unsigned i = 0; i != 16; ++i) {
1512 unsigned Idx = NumElts + i - Shift;
1513 if (Idx < NumElts)
1514 Idx -= NumElts - 16; // end of lane, switch operand.
1515 Idxs[l + i] = Idx + l;
1516 }
1517
1518 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1519 }
1520
1521 // Bitcast back to a 64-bit element type.
1522 return Builder.CreateBitCast(Res, ResultTy, "cast");
1523}
1524
1525// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1526// to byte shuffles.
1528 unsigned Shift) {
1529 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1530 unsigned NumElts = ResultTy->getNumElements() * 8;
1531
1532 // Bitcast from a 64-bit element type to a byte element type.
1533 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1534 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1535
1536 // We'll be shuffling in zeroes.
1537 Value *Res = Constant::getNullValue(VecTy);
1538
1539 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1540 // we'll just return the zero vector.
1541 if (Shift < 16) {
1542 int Idxs[64];
1543 // 256/512-bit version is split into 2/4 16-byte lanes.
1544 for (unsigned l = 0; l != NumElts; l += 16)
1545 for (unsigned i = 0; i != 16; ++i) {
1546 unsigned Idx = i + Shift;
1547 if (Idx >= 16)
1548 Idx += NumElts - 16; // end of lane, switch operand.
1549 Idxs[l + i] = Idx + l;
1550 }
1551
1552 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1553 }
1554
1555 // Bitcast back to a 64-bit element type.
1556 return Builder.CreateBitCast(Res, ResultTy, "cast");
1557}
1558
1559static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1560 unsigned NumElts) {
1561 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1563 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1564 Mask = Builder.CreateBitCast(Mask, MaskTy);
1565
1566 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1567 // i8 and we need to extract down to the right number of elements.
1568 if (NumElts <= 4) {
1569 int Indices[4];
1570 for (unsigned i = 0; i != NumElts; ++i)
1571 Indices[i] = i;
1572 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1573 "extract");
1574 }
1575
1576 return Mask;
1577}
1578
1579static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1580 Value *Op1) {
1581 // If the mask is all ones just emit the first operation.
1582 if (const auto *C = dyn_cast<Constant>(Mask))
1583 if (C->isAllOnesValue())
1584 return Op0;
1585
1586 Mask = getX86MaskVec(Builder, Mask,
1587 cast<FixedVectorType>(Op0->getType())->getNumElements());
1588 return Builder.CreateSelect(Mask, Op0, Op1);
1589}
1590
1591static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1592 Value *Op1) {
1593 // If the mask is all ones just emit the first operation.
1594 if (const auto *C = dyn_cast<Constant>(Mask))
1595 if (C->isAllOnesValue())
1596 return Op0;
1597
1598 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1599 Mask->getType()->getIntegerBitWidth());
1600 Mask = Builder.CreateBitCast(Mask, MaskTy);
1601 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1602 return Builder.CreateSelect(Mask, Op0, Op1);
1603}
1604
1605// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
1606// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
1607// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
1609 Value *Op1, Value *Shift,
1610 Value *Passthru, Value *Mask,
1611 bool IsVALIGN) {
1612 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
1613
1614 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1615 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
1616 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
1617 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
1618
1619 // Mask the immediate for VALIGN.
1620 if (IsVALIGN)
1621 ShiftVal &= (NumElts - 1);
1622
1623 // If palignr is shifting the pair of vectors more than the size of two
1624 // lanes, emit zero.
1625 if (ShiftVal >= 32)
1627
1628 // If palignr is shifting the pair of input vectors more than one lane,
1629 // but less than two lanes, convert to shifting in zeroes.
1630 if (ShiftVal > 16) {
1631 ShiftVal -= 16;
1632 Op1 = Op0;
1634 }
1635
1636 int Indices[64];
1637 // 256-bit palignr operates on 128-bit lanes so we need to handle that
1638 for (unsigned l = 0; l < NumElts; l += 16) {
1639 for (unsigned i = 0; i != 16; ++i) {
1640 unsigned Idx = ShiftVal + i;
1641 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
1642 Idx += NumElts - 16; // End of lane, switch operand.
1643 Indices[l + i] = Idx + l;
1644 }
1645 }
1646
1647 Value *Align = Builder.CreateShuffleVector(
1648 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
1649
1650 return emitX86Select(Builder, Mask, Align, Passthru);
1651}
1652
1654 bool ZeroMask, bool IndexForm) {
1655 Type *Ty = CI.getType();
1656 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1657 unsigned EltWidth = Ty->getScalarSizeInBits();
1658 bool IsFloat = Ty->isFPOrFPVectorTy();
1659 Intrinsic::ID IID;
1660 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1661 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1662 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1663 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1664 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1665 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1666 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1667 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1668 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1669 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1670 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1671 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1672 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1673 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1674 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1675 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1676 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1677 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1678 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1679 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1680 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1681 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1682 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1683 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1684 else if (VecWidth == 128 && EltWidth == 16)
1685 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1686 else if (VecWidth == 256 && EltWidth == 16)
1687 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1688 else if (VecWidth == 512 && EltWidth == 16)
1689 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1690 else if (VecWidth == 128 && EltWidth == 8)
1691 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1692 else if (VecWidth == 256 && EltWidth == 8)
1693 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1694 else if (VecWidth == 512 && EltWidth == 8)
1695 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1696 else
1697 llvm_unreachable("Unexpected intrinsic");
1698
1699 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1700 CI.getArgOperand(2) };
1701
1702 // If this isn't index form we need to swap operand 0 and 1.
1703 if (!IndexForm)
1704 std::swap(Args[0], Args[1]);
1705
1706 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1707 Args);
1708 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1709 : Builder.CreateBitCast(CI.getArgOperand(1),
1710 Ty);
1711 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1712}
1713
1715 Intrinsic::ID IID) {
1716 Type *Ty = CI.getType();
1717 Value *Op0 = CI.getOperand(0);
1718 Value *Op1 = CI.getOperand(1);
1719 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1720 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1721
1722 if (CI.arg_size() == 4) { // For masked intrinsics.
1723 Value *VecSrc = CI.getOperand(2);
1724 Value *Mask = CI.getOperand(3);
1725 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1726 }
1727 return Res;
1728}
1729
1731 bool IsRotateRight) {
1732 Type *Ty = CI.getType();
1733 Value *Src = CI.getArgOperand(0);
1734 Value *Amt = CI.getArgOperand(1);
1735
1736 // Amount may be scalar immediate, in which case create a splat vector.
1737 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1738 // we only care about the lowest log2 bits anyway.
1739 if (Amt->getType() != Ty) {
1740 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1741 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1742 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1743 }
1744
1745 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1746 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1747 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1748
1749 if (CI.arg_size() == 4) { // For masked intrinsics.
1750 Value *VecSrc = CI.getOperand(2);
1751 Value *Mask = CI.getOperand(3);
1752 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1753 }
1754 return Res;
1755}
1756
1757static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
1758 bool IsSigned) {
1759 Type *Ty = CI.getType();
1760 Value *LHS = CI.getArgOperand(0);
1761 Value *RHS = CI.getArgOperand(1);
1762
1763 CmpInst::Predicate Pred;
1764 switch (Imm) {
1765 case 0x0:
1766 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1767 break;
1768 case 0x1:
1769 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1770 break;
1771 case 0x2:
1772 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1773 break;
1774 case 0x3:
1775 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1776 break;
1777 case 0x4:
1778 Pred = ICmpInst::ICMP_EQ;
1779 break;
1780 case 0x5:
1781 Pred = ICmpInst::ICMP_NE;
1782 break;
1783 case 0x6:
1784 return Constant::getNullValue(Ty); // FALSE
1785 case 0x7:
1786 return Constant::getAllOnesValue(Ty); // TRUE
1787 default:
1788 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1789 }
1790
1791 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1792 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1793 return Ext;
1794}
1795
1797 bool IsShiftRight, bool ZeroMask) {
1798 Type *Ty = CI.getType();
1799 Value *Op0 = CI.getArgOperand(0);
1800 Value *Op1 = CI.getArgOperand(1);
1801 Value *Amt = CI.getArgOperand(2);
1802
1803 if (IsShiftRight)
1804 std::swap(Op0, Op1);
1805
1806 // Amount may be scalar immediate, in which case create a splat vector.
1807 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1808 // we only care about the lowest log2 bits anyway.
1809 if (Amt->getType() != Ty) {
1810 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
1811 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1812 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1813 }
1814
1815 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1816 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1817 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1818
1819 unsigned NumArgs = CI.arg_size();
1820 if (NumArgs >= 4) { // For masked intrinsics.
1821 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1822 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1823 CI.getArgOperand(0);
1824 Value *Mask = CI.getOperand(NumArgs - 1);
1825 Res = emitX86Select(Builder, Mask, Res, VecSrc);
1826 }
1827 return Res;
1828}
1829
1831 Value *Mask, bool Aligned) {
1832 // Cast the pointer to the right type.
1833 Ptr = Builder.CreateBitCast(Ptr,
1834 llvm::PointerType::getUnqual(Data->getType()));
1835 const Align Alignment =
1836 Aligned
1837 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
1838 : Align(1);
1839
1840 // If the mask is all ones just emit a regular store.
1841 if (const auto *C = dyn_cast<Constant>(Mask))
1842 if (C->isAllOnesValue())
1843 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
1844
1845 // Convert the mask from an integer type to a vector of i1.
1846 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
1847 Mask = getX86MaskVec(Builder, Mask, NumElts);
1848 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
1849}
1850
1852 Value *Passthru, Value *Mask, bool Aligned) {
1853 Type *ValTy = Passthru->getType();
1854 // Cast the pointer to the right type.
1856 const Align Alignment =
1857 Aligned
1858 ? Align(
1860 8)
1861 : Align(1);
1862
1863 // If the mask is all ones just emit a regular store.
1864 if (const auto *C = dyn_cast<Constant>(Mask))
1865 if (C->isAllOnesValue())
1866 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
1867
1868 // Convert the mask from an integer type to a vector of i1.
1869 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
1870 Mask = getX86MaskVec(Builder, Mask, NumElts);
1871 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
1872}
1873
1874static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
1875 Type *Ty = CI.getType();
1876 Value *Op0 = CI.getArgOperand(0);
1877 Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
1878 Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
1879 if (CI.arg_size() == 3)
1880 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
1881 return Res;
1882}
1883
1884static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
1885 Type *Ty = CI.getType();
1886
1887 // Arguments have a vXi32 type so cast to vXi64.
1888 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1889 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1890
1891 if (IsSigned) {
1892 // Shift left then arithmetic shift right.
1893 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1894 LHS = Builder.CreateShl(LHS, ShiftAmt);
1895 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1896 RHS = Builder.CreateShl(RHS, ShiftAmt);
1897 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1898 } else {
1899 // Clear the upper bits.
1900 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1901 LHS = Builder.CreateAnd(LHS, Mask);
1902 RHS = Builder.CreateAnd(RHS, Mask);
1903 }
1904
1905 Value *Res = Builder.CreateMul(LHS, RHS);
1906
1907 if (CI.arg_size() == 4)
1908 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1909
1910 return Res;
1911}
1912
1913// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
1915 Value *Mask) {
1916 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1917 if (Mask) {
1918 const auto *C = dyn_cast<Constant>(Mask);
1919 if (!C || !C->isAllOnesValue())
1920 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
1921 }
1922
1923 if (NumElts < 8) {
1924 int Indices[8];
1925 for (unsigned i = 0; i != NumElts; ++i)
1926 Indices[i] = i;
1927 for (unsigned i = NumElts; i != 8; ++i)
1928 Indices[i] = NumElts + i % NumElts;
1929 Vec = Builder.CreateShuffleVector(Vec,
1931 Indices);
1932 }
1933 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
1934}
1935
1937 unsigned CC, bool Signed) {
1938 Value *Op0 = CI.getArgOperand(0);
1939 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
1940
1941 Value *Cmp;
1942 if (CC == 3) {
1944 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1945 } else if (CC == 7) {
1947 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
1948 } else {
1950 switch (CC) {
1951 default: llvm_unreachable("Unknown condition code");
1952 case 0: Pred = ICmpInst::ICMP_EQ; break;
1953 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
1954 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
1955 case 4: Pred = ICmpInst::ICMP_NE; break;
1956 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
1957 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
1958 }
1959 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
1960 }
1961
1962 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
1963
1964 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
1965}
1966
1967// Replace a masked intrinsic with an older unmasked intrinsic.
1969 Intrinsic::ID IID) {
1970 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
1971 Value *Rep = Builder.CreateCall(Intrin,
1972 { CI.getArgOperand(0), CI.getArgOperand(1) });
1973 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
1974}
1975
1977 Value* A = CI.getArgOperand(0);
1978 Value* B = CI.getArgOperand(1);
1979 Value* Src = CI.getArgOperand(2);
1980 Value* Mask = CI.getArgOperand(3);
1981
1982 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
1983 Value* Cmp = Builder.CreateIsNotNull(AndNode);
1984 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
1985 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
1986 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
1987 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
1988}
1989
1991 Value* Op = CI.getArgOperand(0);
1992 Type* ReturnOp = CI.getType();
1993 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
1994 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
1995 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1996}
1997
1998// Replace intrinsic with unmasked version and a select.
2000 CallBase &CI, Value *&Rep) {
2001 Name = Name.substr(12); // Remove avx512.mask.
2002
2003 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2004 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2005 Intrinsic::ID IID;
2006 if (Name.starts_with("max.p")) {
2007 if (VecWidth == 128 && EltWidth == 32)
2008 IID = Intrinsic::x86_sse_max_ps;
2009 else if (VecWidth == 128 && EltWidth == 64)
2010 IID = Intrinsic::x86_sse2_max_pd;
2011 else if (VecWidth == 256 && EltWidth == 32)
2012 IID = Intrinsic::x86_avx_max_ps_256;
2013 else if (VecWidth == 256 && EltWidth == 64)
2014 IID = Intrinsic::x86_avx_max_pd_256;
2015 else
2016 llvm_unreachable("Unexpected intrinsic");
2017 } else if (Name.starts_with("min.p")) {
2018 if (VecWidth == 128 && EltWidth == 32)
2019 IID = Intrinsic::x86_sse_min_ps;
2020 else if (VecWidth == 128 && EltWidth == 64)
2021 IID = Intrinsic::x86_sse2_min_pd;
2022 else if (VecWidth == 256 && EltWidth == 32)
2023 IID = Intrinsic::x86_avx_min_ps_256;
2024 else if (VecWidth == 256 && EltWidth == 64)
2025 IID = Intrinsic::x86_avx_min_pd_256;
2026 else
2027 llvm_unreachable("Unexpected intrinsic");
2028 } else if (Name.starts_with("pshuf.b.")) {
2029 if (VecWidth == 128)
2030 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2031 else if (VecWidth == 256)
2032 IID = Intrinsic::x86_avx2_pshuf_b;
2033 else if (VecWidth == 512)
2034 IID = Intrinsic::x86_avx512_pshuf_b_512;
2035 else
2036 llvm_unreachable("Unexpected intrinsic");
2037 } else if (Name.starts_with("pmul.hr.sw.")) {
2038 if (VecWidth == 128)
2039 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2040 else if (VecWidth == 256)
2041 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2042 else if (VecWidth == 512)
2043 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2044 else
2045 llvm_unreachable("Unexpected intrinsic");
2046 } else if (Name.starts_with("pmulh.w.")) {
2047 if (VecWidth == 128)
2048 IID = Intrinsic::x86_sse2_pmulh_w;
2049 else if (VecWidth == 256)
2050 IID = Intrinsic::x86_avx2_pmulh_w;
2051 else if (VecWidth == 512)
2052 IID = Intrinsic::x86_avx512_pmulh_w_512;
2053 else
2054 llvm_unreachable("Unexpected intrinsic");
2055 } else if (Name.starts_with("pmulhu.w.")) {
2056 if (VecWidth == 128)
2057 IID = Intrinsic::x86_sse2_pmulhu_w;
2058 else if (VecWidth == 256)
2059 IID = Intrinsic::x86_avx2_pmulhu_w;
2060 else if (VecWidth == 512)
2061 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2062 else
2063 llvm_unreachable("Unexpected intrinsic");
2064 } else if (Name.starts_with("pmaddw.d.")) {
2065 if (VecWidth == 128)
2066 IID = Intrinsic::x86_sse2_pmadd_wd;
2067 else if (VecWidth == 256)
2068 IID = Intrinsic::x86_avx2_pmadd_wd;
2069 else if (VecWidth == 512)
2070 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2071 else
2072 llvm_unreachable("Unexpected intrinsic");
2073 } else if (Name.starts_with("pmaddubs.w.")) {
2074 if (VecWidth == 128)
2075 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2076 else if (VecWidth == 256)
2077 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2078 else if (VecWidth == 512)
2079 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2080 else
2081 llvm_unreachable("Unexpected intrinsic");
2082 } else if (Name.starts_with("packsswb.")) {
2083 if (VecWidth == 128)
2084 IID = Intrinsic::x86_sse2_packsswb_128;
2085 else if (VecWidth == 256)
2086 IID = Intrinsic::x86_avx2_packsswb;
2087 else if (VecWidth == 512)
2088 IID = Intrinsic::x86_avx512_packsswb_512;
2089 else
2090 llvm_unreachable("Unexpected intrinsic");
2091 } else if (Name.starts_with("packssdw.")) {
2092 if (VecWidth == 128)
2093 IID = Intrinsic::x86_sse2_packssdw_128;
2094 else if (VecWidth == 256)
2095 IID = Intrinsic::x86_avx2_packssdw;
2096 else if (VecWidth == 512)
2097 IID = Intrinsic::x86_avx512_packssdw_512;
2098 else
2099 llvm_unreachable("Unexpected intrinsic");
2100 } else if (Name.starts_with("packuswb.")) {
2101 if (VecWidth == 128)
2102 IID = Intrinsic::x86_sse2_packuswb_128;
2103 else if (VecWidth == 256)
2104 IID = Intrinsic::x86_avx2_packuswb;
2105 else if (VecWidth == 512)
2106 IID = Intrinsic::x86_avx512_packuswb_512;
2107 else
2108 llvm_unreachable("Unexpected intrinsic");
2109 } else if (Name.starts_with("packusdw.")) {
2110 if (VecWidth == 128)
2111 IID = Intrinsic::x86_sse41_packusdw;
2112 else if (VecWidth == 256)
2113 IID = Intrinsic::x86_avx2_packusdw;
2114 else if (VecWidth == 512)
2115 IID = Intrinsic::x86_avx512_packusdw_512;
2116 else
2117 llvm_unreachable("Unexpected intrinsic");
2118 } else if (Name.starts_with("vpermilvar.")) {
2119 if (VecWidth == 128 && EltWidth == 32)
2120 IID = Intrinsic::x86_avx_vpermilvar_ps;
2121 else if (VecWidth == 128 && EltWidth == 64)
2122 IID = Intrinsic::x86_avx_vpermilvar_pd;
2123 else if (VecWidth == 256 && EltWidth == 32)
2124 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2125 else if (VecWidth == 256 && EltWidth == 64)
2126 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2127 else if (VecWidth == 512 && EltWidth == 32)
2128 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2129 else if (VecWidth == 512 && EltWidth == 64)
2130 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2131 else
2132 llvm_unreachable("Unexpected intrinsic");
2133 } else if (Name == "cvtpd2dq.256") {
2134 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2135 } else if (Name == "cvtpd2ps.256") {
2136 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2137 } else if (Name == "cvttpd2dq.256") {
2138 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2139 } else if (Name == "cvttps2dq.128") {
2140 IID = Intrinsic::x86_sse2_cvttps2dq;
2141 } else if (Name == "cvttps2dq.256") {
2142 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2143 } else if (Name.starts_with("permvar.")) {
2144 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2145 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2146 IID = Intrinsic::x86_avx2_permps;
2147 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2148 IID = Intrinsic::x86_avx2_permd;
2149 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2150 IID = Intrinsic::x86_avx512_permvar_df_256;
2151 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2152 IID = Intrinsic::x86_avx512_permvar_di_256;
2153 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2154 IID = Intrinsic::x86_avx512_permvar_sf_512;
2155 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2156 IID = Intrinsic::x86_avx512_permvar_si_512;
2157 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2158 IID = Intrinsic::x86_avx512_permvar_df_512;
2159 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2160 IID = Intrinsic::x86_avx512_permvar_di_512;
2161 else if (VecWidth == 128 && EltWidth == 16)
2162 IID = Intrinsic::x86_avx512_permvar_hi_128;
2163 else if (VecWidth == 256 && EltWidth == 16)
2164 IID = Intrinsic::x86_avx512_permvar_hi_256;
2165 else if (VecWidth == 512 && EltWidth == 16)
2166 IID = Intrinsic::x86_avx512_permvar_hi_512;
2167 else if (VecWidth == 128 && EltWidth == 8)
2168 IID = Intrinsic::x86_avx512_permvar_qi_128;
2169 else if (VecWidth == 256 && EltWidth == 8)
2170 IID = Intrinsic::x86_avx512_permvar_qi_256;
2171 else if (VecWidth == 512 && EltWidth == 8)
2172 IID = Intrinsic::x86_avx512_permvar_qi_512;
2173 else
2174 llvm_unreachable("Unexpected intrinsic");
2175 } else if (Name.starts_with("dbpsadbw.")) {
2176 if (VecWidth == 128)
2177 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2178 else if (VecWidth == 256)
2179 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2180 else if (VecWidth == 512)
2181 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2182 else
2183 llvm_unreachable("Unexpected intrinsic");
2184 } else if (Name.starts_with("pmultishift.qb.")) {
2185 if (VecWidth == 128)
2186 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2187 else if (VecWidth == 256)
2188 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2189 else if (VecWidth == 512)
2190 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2191 else
2192 llvm_unreachable("Unexpected intrinsic");
2193 } else if (Name.starts_with("conflict.")) {
2194 if (Name[9] == 'd' && VecWidth == 128)
2195 IID = Intrinsic::x86_avx512_conflict_d_128;
2196 else if (Name[9] == 'd' && VecWidth == 256)
2197 IID = Intrinsic::x86_avx512_conflict_d_256;
2198 else if (Name[9] == 'd' && VecWidth == 512)
2199 IID = Intrinsic::x86_avx512_conflict_d_512;
2200 else if (Name[9] == 'q' && VecWidth == 128)
2201 IID = Intrinsic::x86_avx512_conflict_q_128;
2202 else if (Name[9] == 'q' && VecWidth == 256)
2203 IID = Intrinsic::x86_avx512_conflict_q_256;
2204 else if (Name[9] == 'q' && VecWidth == 512)
2205 IID = Intrinsic::x86_avx512_conflict_q_512;
2206 else
2207 llvm_unreachable("Unexpected intrinsic");
2208 } else if (Name.starts_with("pavg.")) {
2209 if (Name[5] == 'b' && VecWidth == 128)
2210 IID = Intrinsic::x86_sse2_pavg_b;
2211 else if (Name[5] == 'b' && VecWidth == 256)
2212 IID = Intrinsic::x86_avx2_pavg_b;
2213 else if (Name[5] == 'b' && VecWidth == 512)
2214 IID = Intrinsic::x86_avx512_pavg_b_512;
2215 else if (Name[5] == 'w' && VecWidth == 128)
2216 IID = Intrinsic::x86_sse2_pavg_w;
2217 else if (Name[5] == 'w' && VecWidth == 256)
2218 IID = Intrinsic::x86_avx2_pavg_w;
2219 else if (Name[5] == 'w' && VecWidth == 512)
2220 IID = Intrinsic::x86_avx512_pavg_w_512;
2221 else
2222 llvm_unreachable("Unexpected intrinsic");
2223 } else
2224 return false;
2225
2226 SmallVector<Value *, 4> Args(CI.args());
2227 Args.pop_back();
2228 Args.pop_back();
2229 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
2230 Args);
2231 unsigned NumArgs = CI.arg_size();
2232 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2233 CI.getArgOperand(NumArgs - 2));
2234 return true;
2235}
2236
2237/// Upgrade comment in call to inline asm that represents an objc retain release
2238/// marker.
2239void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2240 size_t Pos;
2241 if (AsmStr->find("mov\tfp") == 0 &&
2242 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2243 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2244 AsmStr->replace(Pos, 1, ";");
2245 }
2246}
2247
2249 IRBuilder<> &Builder) {
2250 LLVMContext &C = F->getContext();
2251 Value *Rep = nullptr;
2252
2253 if (Name.starts_with("sse4a.movnt.")) {
2255 Elts.push_back(
2256 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2257 MDNode *Node = MDNode::get(C, Elts);
2258
2259 Value *Arg0 = CI->getArgOperand(0);
2260 Value *Arg1 = CI->getArgOperand(1);
2261
2262 // Nontemporal (unaligned) store of the 0'th element of the float/double
2263 // vector.
2264 Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
2265 PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
2266 Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
2267 Value *Extract =
2268 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2269
2270 StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
2271 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2272 } else if (Name.starts_with("avx.movnt.") ||
2273 Name.starts_with("avx512.storent.")) {
2275 Elts.push_back(
2276 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2277 MDNode *Node = MDNode::get(C, Elts);
2278
2279 Value *Arg0 = CI->getArgOperand(0);
2280 Value *Arg1 = CI->getArgOperand(1);
2281
2282 // Convert the type of the pointer to a pointer to the stored type.
2283 Value *BC = Builder.CreateBitCast(
2284 Arg0, PointerType::getUnqual(Arg1->getType()), "cast");
2285 StoreInst *SI = Builder.CreateAlignedStore(
2286 Arg1, BC,
2288 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2289 } else if (Name == "sse2.storel.dq") {
2290 Value *Arg0 = CI->getArgOperand(0);
2291 Value *Arg1 = CI->getArgOperand(1);
2292
2293 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2294 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2295 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2296 Value *BC = Builder.CreateBitCast(
2297 Arg0, PointerType::getUnqual(Elt->getType()), "cast");
2298 Builder.CreateAlignedStore(Elt, BC, Align(1));
2299 } else if (Name.starts_with("sse.storeu.") ||
2300 Name.starts_with("sse2.storeu.") ||
2301 Name.starts_with("avx.storeu.")) {
2302 Value *Arg0 = CI->getArgOperand(0);
2303 Value *Arg1 = CI->getArgOperand(1);
2304
2305 Arg0 = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()),
2306 "cast");
2307 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2308 } else if (Name == "avx512.mask.store.ss") {
2309 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2310 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2311 Mask, false);
2312 } else if (Name.starts_with("avx512.mask.store")) {
2313 // "avx512.mask.storeu." or "avx512.mask.store."
2314 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2315 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2316 CI->getArgOperand(2), Aligned);
2317 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2318 // Upgrade packed integer vector compare intrinsics to compare instructions.
2319 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2320 bool CmpEq = Name[9] == 'e';
2321 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2322 CI->getArgOperand(0), CI->getArgOperand(1));
2323 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2324 } else if (Name.starts_with("avx512.broadcastm")) {
2325 Type *ExtTy = Type::getInt32Ty(C);
2326 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2327 ExtTy = Type::getInt64Ty(C);
2328 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2329 ExtTy->getPrimitiveSizeInBits();
2330 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2331 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2332 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2333 Value *Vec = CI->getArgOperand(0);
2334 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2335 Function *Intr = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sqrt,
2336 Elt0->getType());
2337 Elt0 = Builder.CreateCall(Intr, Elt0);
2338 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2339 } else if (Name.starts_with("avx.sqrt.p") ||
2340 Name.starts_with("sse2.sqrt.p") ||
2341 Name.starts_with("sse.sqrt.p")) {
2342 Rep =
2344 F->getParent(), Intrinsic::sqrt, CI->getType()),
2345 {CI->getArgOperand(0)});
2346 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2347 if (CI->arg_size() == 4 &&
2348 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2349 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2350 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2351 : Intrinsic::x86_avx512_sqrt_pd_512;
2352
2353 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2354 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
2355 Args);
2356 } else {
2357 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
2358 Intrinsic::sqrt,
2359 CI->getType()),
2360 {CI->getArgOperand(0)});
2361 }
2362 Rep =
2363 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2364 } else if (Name.starts_with("avx512.ptestm") ||
2365 Name.starts_with("avx512.ptestnm")) {
2366 Value *Op0 = CI->getArgOperand(0);
2367 Value *Op1 = CI->getArgOperand(1);
2368 Value *Mask = CI->getArgOperand(2);
2369 Rep = Builder.CreateAnd(Op0, Op1);
2370 llvm::Type *Ty = Op0->getType();
2372 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2373 ? ICmpInst::ICMP_NE
2374 : ICmpInst::ICMP_EQ;
2375 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2376 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2377 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2378 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2379 ->getNumElements();
2380 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2381 Rep =
2382 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2383 } else if (Name.starts_with("avx512.kunpck")) {
2384 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2385 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2386 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2387 int Indices[64];
2388 for (unsigned i = 0; i != NumElts; ++i)
2389 Indices[i] = i;
2390
2391 // First extract half of each vector. This gives better codegen than
2392 // doing it in a single shuffle.
2393 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2394 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2395 // Concat the vectors.
2396 // NOTE: Operands have to be swapped to match intrinsic definition.
2397 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2398 Rep = Builder.CreateBitCast(Rep, CI->getType());
2399 } else if (Name == "avx512.kand.w") {
2400 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2401 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2402 Rep = Builder.CreateAnd(LHS, RHS);
2403 Rep = Builder.CreateBitCast(Rep, CI->getType());
2404 } else if (Name == "avx512.kandn.w") {
2405 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2406 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2407 LHS = Builder.CreateNot(LHS);
2408 Rep = Builder.CreateAnd(LHS, RHS);
2409 Rep = Builder.CreateBitCast(Rep, CI->getType());
2410 } else if (Name == "avx512.kor.w") {
2411 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2412 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2413 Rep = Builder.CreateOr(LHS, RHS);
2414 Rep = Builder.CreateBitCast(Rep, CI->getType());
2415 } else if (Name == "avx512.kxor.w") {
2416 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2417 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2418 Rep = Builder.CreateXor(LHS, RHS);
2419 Rep = Builder.CreateBitCast(Rep, CI->getType());
2420 } else if (Name == "avx512.kxnor.w") {
2421 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2422 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2423 LHS = Builder.CreateNot(LHS);
2424 Rep = Builder.CreateXor(LHS, RHS);
2425 Rep = Builder.CreateBitCast(Rep, CI->getType());
2426 } else if (Name == "avx512.knot.w") {
2427 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2428 Rep = Builder.CreateNot(Rep);
2429 Rep = Builder.CreateBitCast(Rep, CI->getType());
2430 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2431 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2432 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2433 Rep = Builder.CreateOr(LHS, RHS);
2434 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2435 Value *C;
2436 if (Name[14] == 'c')
2437 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2438 else
2439 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2440 Rep = Builder.CreateICmpEQ(Rep, C);
2441 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2442 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2443 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2444 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2445 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2446 Type *I32Ty = Type::getInt32Ty(C);
2447 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2448 ConstantInt::get(I32Ty, 0));
2449 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2450 ConstantInt::get(I32Ty, 0));
2451 Value *EltOp;
2452 if (Name.contains(".add."))
2453 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2454 else if (Name.contains(".sub."))
2455 EltOp = Builder.CreateFSub(Elt0, Elt1);
2456 else if (Name.contains(".mul."))
2457 EltOp = Builder.CreateFMul(Elt0, Elt1);
2458 else
2459 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2460 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2461 ConstantInt::get(I32Ty, 0));
2462 } else if (Name.starts_with("avx512.mask.pcmp")) {
2463 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2464 bool CmpEq = Name[16] == 'e';
2465 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2466 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2467 Type *OpTy = CI->getArgOperand(0)->getType();
2468 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2469 Intrinsic::ID IID;
2470 switch (VecWidth) {
2471 default:
2472 llvm_unreachable("Unexpected intrinsic");
2473 case 128:
2474 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
2475 break;
2476 case 256:
2477 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
2478 break;
2479 case 512:
2480 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
2481 break;
2482 }
2483
2484 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2485 {CI->getOperand(0), CI->getArgOperand(1)});
2486 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2487 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
2488 Type *OpTy = CI->getArgOperand(0)->getType();
2489 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2490 unsigned EltWidth = OpTy->getScalarSizeInBits();
2491 Intrinsic::ID IID;
2492 if (VecWidth == 128 && EltWidth == 32)
2493 IID = Intrinsic::x86_avx512_fpclass_ps_128;
2494 else if (VecWidth == 256 && EltWidth == 32)
2495 IID = Intrinsic::x86_avx512_fpclass_ps_256;
2496 else if (VecWidth == 512 && EltWidth == 32)
2497 IID = Intrinsic::x86_avx512_fpclass_ps_512;
2498 else if (VecWidth == 128 && EltWidth == 64)
2499 IID = Intrinsic::x86_avx512_fpclass_pd_128;
2500 else if (VecWidth == 256 && EltWidth == 64)
2501 IID = Intrinsic::x86_avx512_fpclass_pd_256;
2502 else if (VecWidth == 512 && EltWidth == 64)
2503 IID = Intrinsic::x86_avx512_fpclass_pd_512;
2504 else
2505 llvm_unreachable("Unexpected intrinsic");
2506
2507 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2508 {CI->getOperand(0), CI->getArgOperand(1)});
2509 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
2510 } else if (Name.starts_with("avx512.cmp.p")) {
2511 SmallVector<Value *, 4> Args(CI->args());
2512 Type *OpTy = Args[0]->getType();
2513 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2514 unsigned EltWidth = OpTy->getScalarSizeInBits();
2515 Intrinsic::ID IID;
2516 if (VecWidth == 128 && EltWidth == 32)
2517 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
2518 else if (VecWidth == 256 && EltWidth == 32)
2519 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
2520 else if (VecWidth == 512 && EltWidth == 32)
2521 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
2522 else if (VecWidth == 128 && EltWidth == 64)
2523 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
2524 else if (VecWidth == 256 && EltWidth == 64)
2525 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
2526 else if (VecWidth == 512 && EltWidth == 64)
2527 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
2528 else
2529 llvm_unreachable("Unexpected intrinsic");
2530
2532 if (VecWidth == 512)
2533 std::swap(Mask, Args.back());
2534 Args.push_back(Mask);
2535
2536 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2537 Args);
2538 } else if (Name.starts_with("avx512.mask.cmp.")) {
2539 // Integer compare intrinsics.
2540 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2541 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
2542 } else if (Name.starts_with("avx512.mask.ucmp.")) {
2543 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2544 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
2545 } else if (Name.starts_with("avx512.cvtb2mask.") ||
2546 Name.starts_with("avx512.cvtw2mask.") ||
2547 Name.starts_with("avx512.cvtd2mask.") ||
2548 Name.starts_with("avx512.cvtq2mask.")) {
2549 Value *Op = CI->getArgOperand(0);
2550 Value *Zero = llvm::Constant::getNullValue(Op->getType());
2551 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
2552 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
2553 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
2554 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
2555 Name.starts_with("avx512.mask.pabs")) {
2556 Rep = upgradeAbs(Builder, *CI);
2557 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
2558 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
2559 Name.starts_with("avx512.mask.pmaxs")) {
2560 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
2561 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
2562 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
2563 Name.starts_with("avx512.mask.pmaxu")) {
2564 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
2565 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
2566 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
2567 Name.starts_with("avx512.mask.pmins")) {
2568 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
2569 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
2570 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
2571 Name.starts_with("avx512.mask.pminu")) {
2572 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
2573 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
2574 Name == "avx512.pmulu.dq.512" ||
2575 Name.starts_with("avx512.mask.pmulu.dq.")) {
2576 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
2577 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
2578 Name == "avx512.pmul.dq.512" ||
2579 Name.starts_with("avx512.mask.pmul.dq.")) {
2580 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
2581 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
2582 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
2583 Rep =
2584 Builder.CreateSIToFP(CI->getArgOperand(1),
2585 cast<VectorType>(CI->getType())->getElementType());
2586 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2587 } else if (Name == "avx512.cvtusi2sd") {
2588 Rep =
2589 Builder.CreateUIToFP(CI->getArgOperand(1),
2590 cast<VectorType>(CI->getType())->getElementType());
2591 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2592 } else if (Name == "sse2.cvtss2sd") {
2593 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2594 Rep = Builder.CreateFPExt(
2595 Rep, cast<VectorType>(CI->getType())->getElementType());
2596 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2597 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
2598 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
2599 Name.starts_with("avx512.mask.cvtdq2pd.") ||
2600 Name.starts_with("avx512.mask.cvtudq2pd.") ||
2601 Name.starts_with("avx512.mask.cvtdq2ps.") ||
2602 Name.starts_with("avx512.mask.cvtudq2ps.") ||
2603 Name.starts_with("avx512.mask.cvtqq2pd.") ||
2604 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
2605 Name == "avx512.mask.cvtqq2ps.256" ||
2606 Name == "avx512.mask.cvtqq2ps.512" ||
2607 Name == "avx512.mask.cvtuqq2ps.256" ||
2608 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
2609 Name == "avx.cvt.ps2.pd.256" ||
2610 Name == "avx512.mask.cvtps2pd.128" ||
2611 Name == "avx512.mask.cvtps2pd.256") {
2612 auto *DstTy = cast<FixedVectorType>(CI->getType());
2613 Rep = CI->getArgOperand(0);
2614 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2615
2616 unsigned NumDstElts = DstTy->getNumElements();
2617 if (NumDstElts < SrcTy->getNumElements()) {
2618 assert(NumDstElts == 2 && "Unexpected vector size");
2619 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
2620 }
2621
2622 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
2623 bool IsUnsigned = Name.contains("cvtu");
2624 if (IsPS2PD)
2625 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
2626 else if (CI->arg_size() == 4 &&
2627 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2628 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2629 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2630 : Intrinsic::x86_avx512_sitofp_round;
2631 Function *F =
2632 Intrinsic::getDeclaration(CI->getModule(), IID, {DstTy, SrcTy});
2633 Rep = Builder.CreateCall(F, {Rep, CI->getArgOperand(3)});
2634 } else {
2635 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2636 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2637 }
2638
2639 if (CI->arg_size() >= 3)
2640 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2641 CI->getArgOperand(1));
2642 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
2643 Name.starts_with("vcvtph2ps.")) {
2644 auto *DstTy = cast<FixedVectorType>(CI->getType());
2645 Rep = CI->getArgOperand(0);
2646 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
2647 unsigned NumDstElts = DstTy->getNumElements();
2648 if (NumDstElts != SrcTy->getNumElements()) {
2649 assert(NumDstElts == 4 && "Unexpected vector size");
2650 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
2651 }
2652 Rep = Builder.CreateBitCast(
2653 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
2654 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
2655 if (CI->arg_size() >= 3)
2656 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2657 CI->getArgOperand(1));
2658 } else if (Name.starts_with("avx512.mask.load")) {
2659 // "avx512.mask.loadu." or "avx512.mask.load."
2660 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
2661 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2662 CI->getArgOperand(2), Aligned);
2663 } else if (Name.starts_with("avx512.mask.expand.load.")) {
2664 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2665 Type *PtrTy = ResultTy->getElementType();
2666
2667 // Cast the pointer to element type.
2668 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2670
2671 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2672 ResultTy->getNumElements());
2673
2675 F->getParent(), Intrinsic::masked_expandload, ResultTy);
2676 Rep = Builder.CreateCall(ELd, {Ptr, MaskVec, CI->getOperand(1)});
2677 } else if (Name.starts_with("avx512.mask.compress.store.")) {
2678 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
2679 Type *PtrTy = ResultTy->getElementType();
2680
2681 // Cast the pointer to element type.
2682 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2684
2685 Value *MaskVec =
2686 getX86MaskVec(Builder, CI->getArgOperand(2),
2687 cast<FixedVectorType>(ResultTy)->getNumElements());
2688
2690 F->getParent(), Intrinsic::masked_compressstore, ResultTy);
2691 Rep = Builder.CreateCall(CSt, {CI->getArgOperand(1), Ptr, MaskVec});
2692 } else if (Name.starts_with("avx512.mask.compress.") ||
2693 Name.starts_with("avx512.mask.expand.")) {
2694 auto *ResultTy = cast<FixedVectorType>(CI->getType());
2695
2696 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2697 ResultTy->getNumElements());
2698
2699 bool IsCompress = Name[12] == 'c';
2700 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2701 : Intrinsic::x86_avx512_mask_expand;
2702 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2703 Rep = Builder.CreateCall(Intr,
2704 {CI->getOperand(0), CI->getOperand(1), MaskVec});
2705 } else if (Name.starts_with("xop.vpcom")) {
2706 bool IsSigned;
2707 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
2708 Name.ends_with("uq"))
2709 IsSigned = false;
2710 else if (Name.ends_with("b") || Name.ends_with("w") ||
2711 Name.ends_with("d") || Name.ends_with("q"))
2712 IsSigned = true;
2713 else
2714 llvm_unreachable("Unknown suffix");
2715
2716 unsigned Imm;
2717 if (CI->arg_size() == 3) {
2718 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2719 } else {
2720 Name = Name.substr(9); // strip off "xop.vpcom"
2721 if (Name.starts_with("lt"))
2722 Imm = 0;
2723 else if (Name.starts_with("le"))
2724 Imm = 1;
2725 else if (Name.starts_with("gt"))
2726 Imm = 2;
2727 else if (Name.starts_with("ge"))
2728 Imm = 3;
2729 else if (Name.starts_with("eq"))
2730 Imm = 4;
2731 else if (Name.starts_with("ne"))
2732 Imm = 5;
2733 else if (Name.starts_with("false"))
2734 Imm = 6;
2735 else if (Name.starts_with("true"))
2736 Imm = 7;
2737 else
2738 llvm_unreachable("Unknown condition");
2739 }
2740
2741 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
2742 } else if (Name.starts_with("xop.vpcmov")) {
2743 Value *Sel = CI->getArgOperand(2);
2744 Value *NotSel = Builder.CreateNot(Sel);
2745 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
2746 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
2747 Rep = Builder.CreateOr(Sel0, Sel1);
2748 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
2749 Name.starts_with("avx512.mask.prol")) {
2750 Rep = upgradeX86Rotate(Builder, *CI, false);
2751 } else if (Name.starts_with("avx512.pror") ||
2752 Name.starts_with("avx512.mask.pror")) {
2753 Rep = upgradeX86Rotate(Builder, *CI, true);
2754 } else if (Name.starts_with("avx512.vpshld.") ||
2755 Name.starts_with("avx512.mask.vpshld") ||
2756 Name.starts_with("avx512.maskz.vpshld")) {
2757 bool ZeroMask = Name[11] == 'z';
2758 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2759 } else if (Name.starts_with("avx512.vpshrd.") ||
2760 Name.starts_with("avx512.mask.vpshrd") ||
2761 Name.starts_with("avx512.maskz.vpshrd")) {
2762 bool ZeroMask = Name[11] == 'z';
2763 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
2764 } else if (Name == "sse42.crc32.64.8") {
2766 F->getParent(), Intrinsic::x86_sse42_crc32_32_8);
2767 Value *Trunc0 =
2768 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
2769 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
2770 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
2771 } else if (Name.starts_with("avx.vbroadcast.s") ||
2772 Name.starts_with("avx512.vbroadcast.s")) {
2773 // Replace broadcasts with a series of insertelements.
2774 auto *VecTy = cast<FixedVectorType>(CI->getType());
2775 Type *EltTy = VecTy->getElementType();
2776 unsigned EltNum = VecTy->getNumElements();
2777 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
2778 Type *I32Ty = Type::getInt32Ty(C);
2779 Rep = PoisonValue::get(VecTy);
2780 for (unsigned I = 0; I < EltNum; ++I)
2781 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
2782 } else if (Name.starts_with("sse41.pmovsx") ||
2783 Name.starts_with("sse41.pmovzx") ||
2784 Name.starts_with("avx2.pmovsx") ||
2785 Name.starts_with("avx2.pmovzx") ||
2786 Name.starts_with("avx512.mask.pmovsx") ||
2787 Name.starts_with("avx512.mask.pmovzx")) {
2788 auto *DstTy = cast<FixedVectorType>(CI->getType());
2789 unsigned NumDstElts = DstTy->getNumElements();
2790
2791 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
2792 SmallVector<int, 8> ShuffleMask(NumDstElts);
2793 for (unsigned i = 0; i != NumDstElts; ++i)
2794 ShuffleMask[i] = i;
2795
2796 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
2797
2798 bool DoSext = Name.contains("pmovsx");
2799 Rep =
2800 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
2801 // If there are 3 arguments, it's a masked intrinsic so we need a select.
2802 if (CI->arg_size() == 3)
2803 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2804 CI->getArgOperand(1));
2805 } else if (Name == "avx512.mask.pmov.qd.256" ||
2806 Name == "avx512.mask.pmov.qd.512" ||
2807 Name == "avx512.mask.pmov.wb.256" ||
2808 Name == "avx512.mask.pmov.wb.512") {
2809 Type *Ty = CI->getArgOperand(1)->getType();
2810 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2811 Rep =
2812 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2813 } else if (Name.starts_with("avx.vbroadcastf128") ||
2814 Name == "avx2.vbroadcasti128") {
2815 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
2816 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
2817 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
2818 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
2819 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
2820 PointerType::getUnqual(VT));
2821 Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
2822 if (NumSrcElts == 2)
2823 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
2824 else
2825 Rep = Builder.CreateShuffleVector(Load,
2826 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
2827 } else if (Name.starts_with("avx512.mask.shuf.i") ||
2828 Name.starts_with("avx512.mask.shuf.f")) {
2829 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2830 Type *VT = CI->getType();
2831 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
2832 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
2833 unsigned ControlBitsMask = NumLanes - 1;
2834 unsigned NumControlBits = NumLanes / 2;
2835 SmallVector<int, 8> ShuffleMask(0);
2836
2837 for (unsigned l = 0; l != NumLanes; ++l) {
2838 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
2839 // We actually need the other source.
2840 if (l >= NumLanes / 2)
2841 LaneMask += NumLanes;
2842 for (unsigned i = 0; i != NumElementsInLane; ++i)
2843 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
2844 }
2845 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2846 CI->getArgOperand(1), ShuffleMask);
2847 Rep =
2848 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
2849 } else if (Name.starts_with("avx512.mask.broadcastf") ||
2850 Name.starts_with("avx512.mask.broadcasti")) {
2851 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
2852 ->getNumElements();
2853 unsigned NumDstElts =
2854 cast<FixedVectorType>(CI->getType())->getNumElements();
2855
2856 SmallVector<int, 8> ShuffleMask(NumDstElts);
2857 for (unsigned i = 0; i != NumDstElts; ++i)
2858 ShuffleMask[i] = i % NumSrcElts;
2859
2860 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
2861 CI->getArgOperand(0), ShuffleMask);
2862 Rep =
2863 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2864 } else if (Name.starts_with("avx2.pbroadcast") ||
2865 Name.starts_with("avx2.vbroadcast") ||
2866 Name.starts_with("avx512.pbroadcast") ||
2867 Name.starts_with("avx512.mask.broadcast.s")) {
2868 // Replace vp?broadcasts with a vector shuffle.
2869 Value *Op = CI->getArgOperand(0);
2870 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
2871 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
2874 Rep = Builder.CreateShuffleVector(Op, M);
2875
2876 if (CI->arg_size() == 3)
2877 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
2878 CI->getArgOperand(1));
2879 } else if (Name.starts_with("sse2.padds.") ||
2880 Name.starts_with("avx2.padds.") ||
2881 Name.starts_with("avx512.padds.") ||
2882 Name.starts_with("avx512.mask.padds.")) {
2883 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
2884 } else if (Name.starts_with("sse2.psubs.") ||
2885 Name.starts_with("avx2.psubs.") ||
2886 Name.starts_with("avx512.psubs.") ||
2887 Name.starts_with("avx512.mask.psubs.")) {
2888 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
2889 } else if (Name.starts_with("sse2.paddus.") ||
2890 Name.starts_with("avx2.paddus.") ||
2891 Name.starts_with("avx512.mask.paddus.")) {
2892 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
2893 } else if (Name.starts_with("sse2.psubus.") ||
2894 Name.starts_with("avx2.psubus.") ||
2895 Name.starts_with("avx512.mask.psubus.")) {
2896 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
2897 } else if (Name.starts_with("avx512.mask.palignr.")) {
2898 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
2899 CI->getArgOperand(1), CI->getArgOperand(2),
2900 CI->getArgOperand(3), CI->getArgOperand(4),
2901 false);
2902 } else if (Name.starts_with("avx512.mask.valign.")) {
2904 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2905 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
2906 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
2907 // 128/256-bit shift left specified in bits.
2908 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2909 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
2910 Shift / 8); // Shift is in bits.
2911 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
2912 // 128/256-bit shift right specified in bits.
2913 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2914 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
2915 Shift / 8); // Shift is in bits.
2916 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
2917 Name == "avx512.psll.dq.512") {
2918 // 128/256/512-bit shift left specified in bytes.
2919 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2920 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2921 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
2922 Name == "avx512.psrl.dq.512") {
2923 // 128/256/512-bit shift right specified in bytes.
2924 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2925 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
2926 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
2927 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
2928 Name.starts_with("avx2.pblendd.")) {
2929 Value *Op0 = CI->getArgOperand(0);
2930 Value *Op1 = CI->getArgOperand(1);
2931 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2932 auto *VecTy = cast<FixedVectorType>(CI->getType());
2933 unsigned NumElts = VecTy->getNumElements();
2934
2935 SmallVector<int, 16> Idxs(NumElts);
2936 for (unsigned i = 0; i != NumElts; ++i)
2937 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
2938
2939 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
2940 } else if (Name.starts_with("avx.vinsertf128.") ||
2941 Name == "avx2.vinserti128" ||
2942 Name.starts_with("avx512.mask.insert")) {
2943 Value *Op0 = CI->getArgOperand(0);
2944 Value *Op1 = CI->getArgOperand(1);
2945 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
2946 unsigned DstNumElts =
2947 cast<FixedVectorType>(CI->getType())->getNumElements();
2948 unsigned SrcNumElts =
2949 cast<FixedVectorType>(Op1->getType())->getNumElements();
2950 unsigned Scale = DstNumElts / SrcNumElts;
2951
2952 // Mask off the high bits of the immediate value; hardware ignores those.
2953 Imm = Imm % Scale;
2954
2955 // Extend the second operand into a vector the size of the destination.
2956 SmallVector<int, 8> Idxs(DstNumElts);
2957 for (unsigned i = 0; i != SrcNumElts; ++i)
2958 Idxs[i] = i;
2959 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
2960 Idxs[i] = SrcNumElts;
2961 Rep = Builder.CreateShuffleVector(Op1, Idxs);
2962
2963 // Insert the second operand into the first operand.
2964
2965 // Note that there is no guarantee that instruction lowering will actually
2966 // produce a vinsertf128 instruction for the created shuffles. In
2967 // particular, the 0 immediate case involves no lane changes, so it can
2968 // be handled as a blend.
2969
2970 // Example of shuffle mask for 32-bit elements:
2971 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
2972 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
2973
2974 // First fill with identify mask.
2975 for (unsigned i = 0; i != DstNumElts; ++i)
2976 Idxs[i] = i;
2977 // Then replace the elements where we need to insert.
2978 for (unsigned i = 0; i != SrcNumElts; ++i)
2979 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
2980 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
2981
2982 // If the intrinsic has a mask operand, handle that.
2983 if (CI->arg_size() == 5)
2984 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
2985 CI->getArgOperand(3));
2986 } else if (Name.starts_with("avx.vextractf128.") ||
2987 Name == "avx2.vextracti128" ||
2988 Name.starts_with("avx512.mask.vextract")) {
2989 Value *Op0 = CI->getArgOperand(0);
2990 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
2991 unsigned DstNumElts =
2992 cast<FixedVectorType>(CI->getType())->getNumElements();
2993 unsigned SrcNumElts =
2994 cast<FixedVectorType>(Op0->getType())->getNumElements();
2995 unsigned Scale = SrcNumElts / DstNumElts;
2996
2997 // Mask off the high bits of the immediate value; hardware ignores those.
2998 Imm = Imm % Scale;
2999
3000 // Get indexes for the subvector of the input vector.
3001 SmallVector<int, 8> Idxs(DstNumElts);
3002 for (unsigned i = 0; i != DstNumElts; ++i) {
3003 Idxs[i] = i + (Imm * DstNumElts);
3004 }
3005 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3006
3007 // If the intrinsic has a mask operand, handle that.
3008 if (CI->arg_size() == 4)
3009 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3010 CI->getArgOperand(2));
3011 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3012 Name.starts_with("avx512.mask.perm.di.")) {
3013 Value *Op0 = CI->getArgOperand(0);
3014 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3015 auto *VecTy = cast<FixedVectorType>(CI->getType());
3016 unsigned NumElts = VecTy->getNumElements();
3017
3018 SmallVector<int, 8> Idxs(NumElts);
3019 for (unsigned i = 0; i != NumElts; ++i)
3020 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3021
3022 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3023
3024 if (CI->arg_size() == 4)
3025 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3026 CI->getArgOperand(2));
3027 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3028 // The immediate permute control byte looks like this:
3029 // [1:0] - select 128 bits from sources for low half of destination
3030 // [2] - ignore
3031 // [3] - zero low half of destination
3032 // [5:4] - select 128 bits from sources for high half of destination
3033 // [6] - ignore
3034 // [7] - zero high half of destination
3035
3036 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3037
3038 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3039 unsigned HalfSize = NumElts / 2;
3040 SmallVector<int, 8> ShuffleMask(NumElts);
3041
3042 // Determine which operand(s) are actually in use for this instruction.
3043 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3044 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3045
3046 // If needed, replace operands based on zero mask.
3047 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3048 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3049
3050 // Permute low half of result.
3051 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3052 for (unsigned i = 0; i < HalfSize; ++i)
3053 ShuffleMask[i] = StartIndex + i;
3054
3055 // Permute high half of result.
3056 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3057 for (unsigned i = 0; i < HalfSize; ++i)
3058 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3059
3060 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3061
3062 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3063 Name.starts_with("avx512.mask.vpermil.p") ||
3064 Name.starts_with("avx512.mask.pshuf.d.")) {
3065 Value *Op0 = CI->getArgOperand(0);
3066 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3067 auto *VecTy = cast<FixedVectorType>(CI->getType());
3068 unsigned NumElts = VecTy->getNumElements();
3069 // Calculate the size of each index in the immediate.
3070 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3071 unsigned IdxMask = ((1 << IdxSize) - 1);
3072
3073 SmallVector<int, 8> Idxs(NumElts);
3074 // Lookup the bits for this element, wrapping around the immediate every
3075 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3076 // to offset by the first index of each group.
3077 for (unsigned i = 0; i != NumElts; ++i)
3078 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3079
3080 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3081
3082 if (CI->arg_size() == 4)
3083 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3084 CI->getArgOperand(2));
3085 } else if (Name == "sse2.pshufl.w" ||
3086 Name.starts_with("avx512.mask.pshufl.w.")) {
3087 Value *Op0 = CI->getArgOperand(0);
3088 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3089 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3090
3091 SmallVector<int, 16> Idxs(NumElts);
3092 for (unsigned l = 0; l != NumElts; l += 8) {
3093 for (unsigned i = 0; i != 4; ++i)
3094 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3095 for (unsigned i = 4; i != 8; ++i)
3096 Idxs[i + l] = i + l;
3097 }
3098
3099 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3100
3101 if (CI->arg_size() == 4)
3102 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3103 CI->getArgOperand(2));
3104 } else if (Name == "sse2.pshufh.w" ||
3105 Name.starts_with("avx512.mask.pshufh.w.")) {
3106 Value *Op0 = CI->getArgOperand(0);
3107 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3108 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3109
3110 SmallVector<int, 16> Idxs(NumElts);
3111 for (unsigned l = 0; l != NumElts; l += 8) {
3112 for (unsigned i = 0; i != 4; ++i)
3113 Idxs[i + l] = i + l;
3114 for (unsigned i = 0; i != 4; ++i)
3115 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3116 }
3117
3118 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3119
3120 if (CI->arg_size() == 4)
3121 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3122 CI->getArgOperand(2));
3123 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3124 Value *Op0 = CI->getArgOperand(0);
3125 Value *Op1 = CI->getArgOperand(1);
3126 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3127 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3128
3129 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3130 unsigned HalfLaneElts = NumLaneElts / 2;
3131
3132 SmallVector<int, 16> Idxs(NumElts);
3133 for (unsigned i = 0; i != NumElts; ++i) {
3134 // Base index is the starting element of the lane.
3135 Idxs[i] = i - (i % NumLaneElts);
3136 // If we are half way through the lane switch to the other source.
3137 if ((i % NumLaneElts) >= HalfLaneElts)
3138 Idxs[i] += NumElts;
3139 // Now select the specific element. By adding HalfLaneElts bits from
3140 // the immediate. Wrapping around the immediate every 8-bits.
3141 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3142 }
3143
3144 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3145
3146 Rep =
3147 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3148 } else if (Name.starts_with("avx512.mask.movddup") ||
3149 Name.starts_with("avx512.mask.movshdup") ||
3150 Name.starts_with("avx512.mask.movsldup")) {
3151 Value *Op0 = CI->getArgOperand(0);
3152 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3153 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3154
3155 unsigned Offset = 0;
3156 if (Name.starts_with("avx512.mask.movshdup."))
3157 Offset = 1;
3158
3159 SmallVector<int, 16> Idxs(NumElts);
3160 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3161 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3162 Idxs[i + l + 0] = i + l + Offset;
3163 Idxs[i + l + 1] = i + l + Offset;
3164 }
3165
3166 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3167
3168 Rep =
3169 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3170 } else if (Name.starts_with("avx512.mask.punpckl") ||
3171 Name.starts_with("avx512.mask.unpckl.")) {
3172 Value *Op0 = CI->getArgOperand(0);
3173 Value *Op1 = CI->getArgOperand(1);
3174 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3175 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3176
3177 SmallVector<int, 64> Idxs(NumElts);
3178 for (int l = 0; l != NumElts; l += NumLaneElts)
3179 for (int i = 0; i != NumLaneElts; ++i)
3180 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3181
3182 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3183
3184 Rep =
3185 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3186 } else if (Name.starts_with("avx512.mask.punpckh") ||
3187 Name.starts_with("avx512.mask.unpckh.")) {
3188 Value *Op0 = CI->getArgOperand(0);
3189 Value *Op1 = CI->getArgOperand(1);
3190 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3191 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3192
3193 SmallVector<int, 64> Idxs(NumElts);
3194 for (int l = 0; l != NumElts; l += NumLaneElts)
3195 for (int i = 0; i != NumLaneElts; ++i)
3196 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3197
3198 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3199
3200 Rep =
3201 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3202 } else if (Name.starts_with("avx512.mask.and.") ||
3203 Name.starts_with("avx512.mask.pand.")) {
3204 VectorType *FTy = cast<VectorType>(CI->getType());
3205 VectorType *ITy = VectorType::getInteger(FTy);
3206 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3207 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3208 Rep = Builder.CreateBitCast(Rep, FTy);
3209 Rep =
3210 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3211 } else if (Name.starts_with("avx512.mask.andn.") ||
3212 Name.starts_with("avx512.mask.pandn.")) {
3213 VectorType *FTy = cast<VectorType>(CI->getType());
3214 VectorType *ITy = VectorType::getInteger(FTy);
3215 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3216 Rep = Builder.CreateAnd(Rep,
3217 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3218 Rep = Builder.CreateBitCast(Rep, FTy);
3219 Rep =
3220 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3221 } else if (Name.starts_with("avx512.mask.or.") ||
3222 Name.starts_with("avx512.mask.por.")) {
3223 VectorType *FTy = cast<VectorType>(CI->getType());
3224 VectorType *ITy = VectorType::getInteger(FTy);
3225 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3226 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3227 Rep = Builder.CreateBitCast(Rep, FTy);
3228 Rep =
3229 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3230 } else if (Name.starts_with("avx512.mask.xor.") ||
3231 Name.starts_with("avx512.mask.pxor.")) {
3232 VectorType *FTy = cast<VectorType>(CI->getType());
3233 VectorType *ITy = VectorType::getInteger(FTy);
3234 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3235 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3236 Rep = Builder.CreateBitCast(Rep, FTy);
3237 Rep =
3238 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3239 } else if (Name.starts_with("avx512.mask.padd.")) {
3240 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3241 Rep =
3242 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3243 } else if (Name.starts_with("avx512.mask.psub.")) {
3244 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3245 Rep =
3246 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3247 } else if (Name.starts_with("avx512.mask.pmull.")) {
3248 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3249 Rep =
3250 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3251 } else if (Name.starts_with("avx512.mask.add.p")) {
3252 if (Name.ends_with(".512")) {
3253 Intrinsic::ID IID;
3254 if (Name[17] == 's')
3255 IID = Intrinsic::x86_avx512_add_ps_512;
3256 else
3257 IID = Intrinsic::x86_avx512_add_pd_512;
3258
3259 Rep = Builder.CreateCall(
3260 Intrinsic::getDeclaration(F->getParent(), IID),
3261 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3262 } else {
3263 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3264 }
3265 Rep =
3266 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3267 } else if (Name.starts_with("avx512.mask.div.p")) {
3268 if (Name.ends_with(".512")) {
3269 Intrinsic::ID IID;
3270 if (Name[17] == 's')
3271 IID = Intrinsic::x86_avx512_div_ps_512;
3272 else
3273 IID = Intrinsic::x86_avx512_div_pd_512;
3274
3275 Rep = Builder.CreateCall(
3276 Intrinsic::getDeclaration(F->getParent(), IID),
3277 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3278 } else {
3279 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3280 }
3281 Rep =
3282 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3283 } else if (Name.starts_with("avx512.mask.mul.p")) {
3284 if (Name.ends_with(".512")) {
3285 Intrinsic::ID IID;
3286 if (Name[17] == 's')
3287 IID = Intrinsic::x86_avx512_mul_ps_512;
3288 else
3289 IID = Intrinsic::x86_avx512_mul_pd_512;
3290
3291 Rep = Builder.CreateCall(
3292 Intrinsic::getDeclaration(F->getParent(), IID),
3293 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3294 } else {
3295 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3296 }
3297 Rep =
3298 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3299 } else if (Name.starts_with("avx512.mask.sub.p")) {
3300 if (Name.ends_with(".512")) {
3301 Intrinsic::ID IID;
3302 if (Name[17] == 's')
3303 IID = Intrinsic::x86_avx512_sub_ps_512;
3304 else
3305 IID = Intrinsic::x86_avx512_sub_pd_512;
3306
3307 Rep = Builder.CreateCall(
3308 Intrinsic::getDeclaration(F->getParent(), IID),
3309 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3310 } else {
3311 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3312 }
3313 Rep =
3314 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3315 } else if ((Name.starts_with("avx512.mask.max.p") ||
3316 Name.starts_with("avx512.mask.min.p")) &&
3317 Name.drop_front(18) == ".512") {
3318 bool IsDouble = Name[17] == 'd';
3319 bool IsMin = Name[13] == 'i';
3320 static const Intrinsic::ID MinMaxTbl[2][2] = {
3321 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3322 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3323 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3324
3325 Rep = Builder.CreateCall(
3326 Intrinsic::getDeclaration(F->getParent(), IID),
3327 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3328 Rep =
3329 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3330 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3331 Rep =
3333 F->getParent(), Intrinsic::ctlz, CI->getType()),
3334 {CI->getArgOperand(0), Builder.getInt1(false)});
3335 Rep =
3336 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3337 } else if (Name.starts_with("avx512.mask.psll")) {
3338 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3339 bool IsVariable = Name[16] == 'v';
3340 char Size = Name[16] == '.' ? Name[17]
3341 : Name[17] == '.' ? Name[18]
3342 : Name[18] == '.' ? Name[19]
3343 : Name[20];
3344
3345 Intrinsic::ID IID;
3346 if (IsVariable && Name[17] != '.') {
3347 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3348 IID = Intrinsic::x86_avx2_psllv_q;
3349 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3350 IID = Intrinsic::x86_avx2_psllv_q_256;
3351 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3352 IID = Intrinsic::x86_avx2_psllv_d;
3353 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3354 IID = Intrinsic::x86_avx2_psllv_d_256;
3355 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3356 IID = Intrinsic::x86_avx512_psllv_w_128;
3357 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3358 IID = Intrinsic::x86_avx512_psllv_w_256;
3359 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3360 IID = Intrinsic::x86_avx512_psllv_w_512;
3361 else
3362 llvm_unreachable("Unexpected size");
3363 } else if (Name.ends_with(".128")) {
3364 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3365 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3366 : Intrinsic::x86_sse2_psll_d;
3367 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3368 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3369 : Intrinsic::x86_sse2_psll_q;
3370 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3371 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3372 : Intrinsic::x86_sse2_psll_w;
3373 else
3374 llvm_unreachable("Unexpected size");
3375 } else if (Name.ends_with(".256")) {
3376 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3377 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3378 : Intrinsic::x86_avx2_psll_d;
3379 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3380 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3381 : Intrinsic::x86_avx2_psll_q;
3382 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3383 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3384 : Intrinsic::x86_avx2_psll_w;
3385 else
3386 llvm_unreachable("Unexpected size");
3387 } else {
3388 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3389 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3390 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3391 : Intrinsic::x86_avx512_psll_d_512;
3392 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3393 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3394 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3395 : Intrinsic::x86_avx512_psll_q_512;
3396 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3397 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3398 : Intrinsic::x86_avx512_psll_w_512;
3399 else
3400 llvm_unreachable("Unexpected size");
3401 }
3402
3403 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3404 } else if (Name.starts_with("avx512.mask.psrl")) {
3405 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3406 bool IsVariable = Name[16] == 'v';
3407 char Size = Name[16] == '.' ? Name[17]
3408 : Name[17] == '.' ? Name[18]
3409 : Name[18] == '.' ? Name[19]
3410 : Name[20];
3411
3412 Intrinsic::ID IID;
3413 if (IsVariable && Name[17] != '.') {
3414 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3415 IID = Intrinsic::x86_avx2_psrlv_q;
3416 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3417 IID = Intrinsic::x86_avx2_psrlv_q_256;
3418 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3419 IID = Intrinsic::x86_avx2_psrlv_d;
3420 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3421 IID = Intrinsic::x86_avx2_psrlv_d_256;
3422 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3423 IID = Intrinsic::x86_avx512_psrlv_w_128;
3424 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3425 IID = Intrinsic::x86_avx512_psrlv_w_256;
3426 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3427 IID = Intrinsic::x86_avx512_psrlv_w_512;
3428 else
3429 llvm_unreachable("Unexpected size");
3430 } else if (Name.ends_with(".128")) {
3431 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3432 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3433 : Intrinsic::x86_sse2_psrl_d;
3434 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3435 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3436 : Intrinsic::x86_sse2_psrl_q;
3437 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3438 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3439 : Intrinsic::x86_sse2_psrl_w;
3440 else
3441 llvm_unreachable("Unexpected size");
3442 } else if (Name.ends_with(".256")) {
3443 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3444 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3445 : Intrinsic::x86_avx2_psrl_d;
3446 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3447 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3448 : Intrinsic::x86_avx2_psrl_q;
3449 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3450 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3451 : Intrinsic::x86_avx2_psrl_w;
3452 else
3453 llvm_unreachable("Unexpected size");
3454 } else {
3455 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3456 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3457 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3458 : Intrinsic::x86_avx512_psrl_d_512;
3459 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3460 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3461 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3462 : Intrinsic::x86_avx512_psrl_q_512;
3463 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3464 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3465 : Intrinsic::x86_avx512_psrl_w_512;
3466 else
3467 llvm_unreachable("Unexpected size");
3468 }
3469
3470 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3471 } else if (Name.starts_with("avx512.mask.psra")) {
3472 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3473 bool IsVariable = Name[16] == 'v';
3474 char Size = Name[16] == '.' ? Name[17]
3475 : Name[17] == '.' ? Name[18]
3476 : Name[18] == '.' ? Name[19]
3477 : Name[20];
3478
3479 Intrinsic::ID IID;
3480 if (IsVariable && Name[17] != '.') {
3481 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3482 IID = Intrinsic::x86_avx2_psrav_d;
3483 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3484 IID = Intrinsic::x86_avx2_psrav_d_256;
3485 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3486 IID = Intrinsic::x86_avx512_psrav_w_128;
3487 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3488 IID = Intrinsic::x86_avx512_psrav_w_256;
3489 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3490 IID = Intrinsic::x86_avx512_psrav_w_512;
3491 else
3492 llvm_unreachable("Unexpected size");
3493 } else if (Name.ends_with(".128")) {
3494 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
3495 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
3496 : Intrinsic::x86_sse2_psra_d;
3497 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
3498 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
3499 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
3500 : Intrinsic::x86_avx512_psra_q_128;
3501 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
3502 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
3503 : Intrinsic::x86_sse2_psra_w;
3504 else
3505 llvm_unreachable("Unexpected size");
3506 } else if (Name.ends_with(".256")) {
3507 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
3508 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
3509 : Intrinsic::x86_avx2_psra_d;
3510 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
3511 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
3512 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
3513 : Intrinsic::x86_avx512_psra_q_256;
3514 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
3515 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
3516 : Intrinsic::x86_avx2_psra_w;
3517 else
3518 llvm_unreachable("Unexpected size");
3519 } else {
3520 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
3521 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
3522 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
3523 : Intrinsic::x86_avx512_psra_d_512;
3524 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
3525 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
3526 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
3527 : Intrinsic::x86_avx512_psra_q_512;
3528 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
3529 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
3530 : Intrinsic::x86_avx512_psra_w_512;
3531 else
3532 llvm_unreachable("Unexpected size");
3533 }
3534
3535 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3536 } else if (Name.starts_with("avx512.mask.move.s")) {
3537 Rep = upgradeMaskedMove(Builder, *CI);
3538 } else if (Name.starts_with("avx512.cvtmask2")) {
3539 Rep = upgradeMaskToInt(Builder, *CI);
3540 } else if (Name.ends_with(".movntdqa")) {
3542 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
3543
3544 Value *Ptr = CI->getArgOperand(0);
3545
3546 // Convert the type of the pointer to a pointer to the stored type.
3547 Value *BC = Builder.CreateBitCast(
3548 Ptr, PointerType::getUnqual(CI->getType()), "cast");
3549 LoadInst *LI = Builder.CreateAlignedLoad(
3550 CI->getType(), BC,
3552 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
3553 Rep = LI;
3554 } else if (Name.starts_with("fma.vfmadd.") ||
3555 Name.starts_with("fma.vfmsub.") ||
3556 Name.starts_with("fma.vfnmadd.") ||
3557 Name.starts_with("fma.vfnmsub.")) {
3558 bool NegMul = Name[6] == 'n';
3559 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3560 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3561
3562 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3563 CI->getArgOperand(2)};
3564
3565 if (IsScalar) {
3566 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3567 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3568 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3569 }
3570
3571 if (NegMul && !IsScalar)
3572 Ops[0] = Builder.CreateFNeg(Ops[0]);
3573 if (NegMul && IsScalar)
3574 Ops[1] = Builder.CreateFNeg(Ops[1]);
3575 if (NegAcc)
3576 Ops[2] = Builder.CreateFNeg(Ops[2]);
3577
3579 Intrinsic::fma,
3580 Ops[0]->getType()),
3581 Ops);
3582
3583 if (IsScalar)
3584 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3585 } else if (Name.starts_with("fma4.vfmadd.s")) {
3586 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3587 CI->getArgOperand(2)};
3588
3589 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3590 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3591 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3592
3594 Intrinsic::fma,
3595 Ops[0]->getType()),
3596 Ops);
3597
3599 Rep, (uint64_t)0);
3600 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
3601 Name.starts_with("avx512.maskz.vfmadd.s") ||
3602 Name.starts_with("avx512.mask3.vfmadd.s") ||
3603 Name.starts_with("avx512.mask3.vfmsub.s") ||
3604 Name.starts_with("avx512.mask3.vfnmsub.s")) {
3605 bool IsMask3 = Name[11] == '3';
3606 bool IsMaskZ = Name[11] == 'z';
3607 // Drop the "avx512.mask." to make it easier.
3608 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3609 bool NegMul = Name[2] == 'n';
3610 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3611
3612 Value *A = CI->getArgOperand(0);
3613 Value *B = CI->getArgOperand(1);
3614 Value *C = CI->getArgOperand(2);
3615
3616 if (NegMul && (IsMask3 || IsMaskZ))
3617 A = Builder.CreateFNeg(A);
3618 if (NegMul && !(IsMask3 || IsMaskZ))
3619 B = Builder.CreateFNeg(B);
3620 if (NegAcc)
3621 C = Builder.CreateFNeg(C);
3622
3623 A = Builder.CreateExtractElement(A, (uint64_t)0);
3624 B = Builder.CreateExtractElement(B, (uint64_t)0);
3625 C = Builder.CreateExtractElement(C, (uint64_t)0);
3626
3627 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3628 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3629 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
3630
3631 Intrinsic::ID IID;
3632 if (Name.back() == 'd')
3633 IID = Intrinsic::x86_avx512_vfmadd_f64;
3634 else
3635 IID = Intrinsic::x86_avx512_vfmadd_f32;
3636 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3637 Rep = Builder.CreateCall(FMA, Ops);
3638 } else {
3639 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3640 A->getType());
3641 Rep = Builder.CreateCall(FMA, {A, B, C});
3642 }
3643
3644 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
3645 : IsMask3 ? C
3646 : A;
3647
3648 // For Mask3 with NegAcc, we need to create a new extractelement that
3649 // avoids the negation above.
3650 if (NegAcc && IsMask3)
3651 PassThru =
3652 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
3653
3654 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
3655 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
3656 (uint64_t)0);
3657 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
3658 Name.starts_with("avx512.mask.vfnmadd.p") ||
3659 Name.starts_with("avx512.mask.vfnmsub.p") ||
3660 Name.starts_with("avx512.mask3.vfmadd.p") ||
3661 Name.starts_with("avx512.mask3.vfmsub.p") ||
3662 Name.starts_with("avx512.mask3.vfnmsub.p") ||
3663 Name.starts_with("avx512.maskz.vfmadd.p")) {
3664 bool IsMask3 = Name[11] == '3';
3665 bool IsMaskZ = Name[11] == 'z';
3666 // Drop the "avx512.mask." to make it easier.
3667 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3668 bool NegMul = Name[2] == 'n';
3669 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3670
3671 Value *A = CI->getArgOperand(0);
3672 Value *B = CI->getArgOperand(1);
3673 Value *C = CI->getArgOperand(2);
3674
3675 if (NegMul && (IsMask3 || IsMaskZ))
3676 A = Builder.CreateFNeg(A);
3677 if (NegMul && !(IsMask3 || IsMaskZ))
3678 B = Builder.CreateFNeg(B);
3679 if (NegAcc)
3680 C = Builder.CreateFNeg(C);
3681
3682 if (CI->arg_size() == 5 &&
3683 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3684 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3685 Intrinsic::ID IID;
3686 // Check the character before ".512" in string.
3687 if (Name[Name.size() - 5] == 's')
3688 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3689 else
3690 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3691
3692 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3693 {A, B, C, CI->getArgOperand(4)});
3694 } else {
3695 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3696 A->getType());
3697 Rep = Builder.CreateCall(FMA, {A, B, C});
3698 }
3699
3700 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3701 : IsMask3 ? CI->getArgOperand(2)
3702 : CI->getArgOperand(0);
3703
3704 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3705 } else if (Name.starts_with("fma.vfmsubadd.p")) {
3706 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3707 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3708 Intrinsic::ID IID;
3709 if (VecWidth == 128 && EltWidth == 32)
3710 IID = Intrinsic::x86_fma_vfmaddsub_ps;
3711 else if (VecWidth == 256 && EltWidth == 32)
3712 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
3713 else if (VecWidth == 128 && EltWidth == 64)
3714 IID = Intrinsic::x86_fma_vfmaddsub_pd;
3715 else if (VecWidth == 256 && EltWidth == 64)
3716 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
3717 else
3718 llvm_unreachable("Unexpected intrinsic");
3719
3720 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3721 CI->getArgOperand(2)};
3722 Ops[2] = Builder.CreateFNeg(Ops[2]);
3723 Rep =
3724 Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), Ops);
3725 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
3726 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
3727 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
3728 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
3729 bool IsMask3 = Name[11] == '3';
3730 bool IsMaskZ = Name[11] == 'z';
3731 // Drop the "avx512.mask." to make it easier.
3732 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3733 bool IsSubAdd = Name[3] == 's';
3734 if (CI->arg_size() == 5) {
3735 Intrinsic::ID IID;
3736 // Check the character before ".512" in string.
3737 if (Name[Name.size() - 5] == 's')
3738 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3739 else
3740 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3741
3742 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3743 CI->getArgOperand(2), CI->getArgOperand(4)};
3744 if (IsSubAdd)
3745 Ops[2] = Builder.CreateFNeg(Ops[2]);
3746
3747 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3748 Ops);
3749 } else {
3750 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3751
3752 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3753 CI->getArgOperand(2)};
3754
3755 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3756 Ops[0]->getType());
3757 Value *Odd = Builder.CreateCall(FMA, Ops);
3758 Ops[2] = Builder.CreateFNeg(Ops[2]);
3759 Value *Even = Builder.CreateCall(FMA, Ops);
3760
3761 if (IsSubAdd)
3762 std::swap(Even, Odd);
3763
3764 SmallVector<int, 32> Idxs(NumElts);
3765 for (int i = 0; i != NumElts; ++i)
3766 Idxs[i] = i + (i % 2) * NumElts;
3767
3768 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3769 }
3770
3771 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
3772 : IsMask3 ? CI->getArgOperand(2)
3773 : CI->getArgOperand(0);
3774
3775 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3776 } else if (Name.starts_with("avx512.mask.pternlog.") ||
3777 Name.starts_with("avx512.maskz.pternlog.")) {
3778 bool ZeroMask = Name[11] == 'z';
3779 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3780 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3781 Intrinsic::ID IID;
3782 if (VecWidth == 128 && EltWidth == 32)
3783 IID = Intrinsic::x86_avx512_pternlog_d_128;
3784 else if (VecWidth == 256 && EltWidth == 32)
3785 IID = Intrinsic::x86_avx512_pternlog_d_256;
3786 else if (VecWidth == 512 && EltWidth == 32)
3787 IID = Intrinsic::x86_avx512_pternlog_d_512;
3788 else if (VecWidth == 128 && EltWidth == 64)
3789 IID = Intrinsic::x86_avx512_pternlog_q_128;
3790 else if (VecWidth == 256 && EltWidth == 64)
3791 IID = Intrinsic::x86_avx512_pternlog_q_256;
3792 else if (VecWidth == 512 && EltWidth == 64)
3793 IID = Intrinsic::x86_avx512_pternlog_q_512;
3794 else
3795 llvm_unreachable("Unexpected intrinsic");
3796
3797 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3798 CI->getArgOperand(2), CI->getArgOperand(3)};
3799 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3800 Args);
3801 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3802 : CI->getArgOperand(0);
3803 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3804 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
3805 Name.starts_with("avx512.maskz.vpmadd52")) {
3806 bool ZeroMask = Name[11] == 'z';
3807 bool High = Name[20] == 'h' || Name[21] == 'h';
3808 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3809 Intrinsic::ID IID;
3810 if (VecWidth == 128 && !High)
3811 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3812 else if (VecWidth == 256 && !High)
3813 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3814 else if (VecWidth == 512 && !High)
3815 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3816 else if (VecWidth == 128 && High)
3817 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3818 else if (VecWidth == 256 && High)
3819 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3820 else if (VecWidth == 512 && High)
3821 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3822 else
3823 llvm_unreachable("Unexpected intrinsic");
3824
3825 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3826 CI->getArgOperand(2)};
3827 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3828 Args);
3829 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3830 : CI->getArgOperand(0);
3831 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3832 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
3833 Name.starts_with("avx512.mask.vpermt2var.") ||
3834 Name.starts_with("avx512.maskz.vpermt2var.")) {
3835 bool ZeroMask = Name[11] == 'z';
3836 bool IndexForm = Name[17] == 'i';
3837 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3838 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
3839 Name.starts_with("avx512.maskz.vpdpbusd.") ||
3840 Name.starts_with("avx512.mask.vpdpbusds.") ||
3841 Name.starts_with("avx512.maskz.vpdpbusds.")) {
3842 bool ZeroMask = Name[11] == 'z';
3843 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3844 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3845 Intrinsic::ID IID;
3846 if (VecWidth == 128 && !IsSaturating)
3847 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3848 else if (VecWidth == 256 && !IsSaturating)
3849 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3850 else if (VecWidth == 512 && !IsSaturating)
3851 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3852 else if (VecWidth == 128 && IsSaturating)
3853 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3854 else if (VecWidth == 256 && IsSaturating)
3855 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3856 else if (VecWidth == 512 && IsSaturating)
3857 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3858 else
3859 llvm_unreachable("Unexpected intrinsic");
3860
3861 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3862 CI->getArgOperand(2)};
3863 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3864 Args);
3865 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3866 : CI->getArgOperand(0);
3867 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3868 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
3869 Name.starts_with("avx512.maskz.vpdpwssd.") ||
3870 Name.starts_with("avx512.mask.vpdpwssds.") ||
3871 Name.starts_with("avx512.maskz.vpdpwssds.")) {
3872 bool ZeroMask = Name[11] == 'z';
3873 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3874 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3875 Intrinsic::ID IID;
3876 if (VecWidth == 128 && !IsSaturating)
3877 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3878 else if (VecWidth == 256 && !IsSaturating)
3879 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3880 else if (VecWidth == 512 && !IsSaturating)
3881 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3882 else if (VecWidth == 128 && IsSaturating)
3883 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3884 else if (VecWidth == 256 && IsSaturating)
3885 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3886 else if (VecWidth == 512 && IsSaturating)
3887 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3888 else
3889 llvm_unreachable("Unexpected intrinsic");
3890
3891 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3892 CI->getArgOperand(2)};
3893 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3894 Args);
3895 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3896 : CI->getArgOperand(0);
3897 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3898 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3899 Name == "addcarry.u32" || Name == "addcarry.u64" ||
3900 Name == "subborrow.u32" || Name == "subborrow.u64") {
3901 Intrinsic::ID IID;
3902 if (Name[0] == 'a' && Name.back() == '2')
3903 IID = Intrinsic::x86_addcarry_32;
3904 else if (Name[0] == 'a' && Name.back() == '4')
3905 IID = Intrinsic::x86_addcarry_64;
3906 else if (Name[0] == 's' && Name.back() == '2')
3907 IID = Intrinsic::x86_subborrow_32;
3908 else if (Name[0] == 's' && Name.back() == '4')
3909 IID = Intrinsic::x86_subborrow_64;
3910 else
3911 llvm_unreachable("Unexpected intrinsic");
3912
3913 // Make a call with 3 operands.
3914 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
3915 CI->getArgOperand(2)};
3916 Value *NewCall = Builder.CreateCall(
3917 Intrinsic::getDeclaration(CI->getModule(), IID), Args);
3918
3919 // Extract the second result and store it.
3920 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3921 // Cast the pointer to the right type.
3922 Value *Ptr = Builder.CreateBitCast(
3923 CI->getArgOperand(3), llvm::PointerType::getUnqual(Data->getType()));
3924 Builder.CreateAlignedStore(Data, Ptr, Align(1));
3925 // Replace the original call result with the first result of the new call.
3926 Value *CF = Builder.CreateExtractValue(NewCall, 0);
3927
3928 CI->replaceAllUsesWith(CF);
3929 Rep = nullptr;
3930 } else if (Name.starts_with("avx512.mask.") &&
3931 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3932 // Rep will be updated by the call in the condition.
3933 }
3934
3935 return Rep;
3936}
3937
3939 IRBuilder<> &Builder) {
3940 if (Name == "mve.vctp64.old") {
3941 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
3942 // correct type.
3943 Value *VCTP = Builder.CreateCall(
3944 Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
3945 CI->getArgOperand(0), CI->getName());
3946 Value *C1 = Builder.CreateCall(
3948 F->getParent(), Intrinsic::arm_mve_pred_v2i,
3949 {VectorType::get(Builder.getInt1Ty(), 2, false)}),
3950 VCTP);
3951 return Builder.CreateCall(
3953 F->getParent(), Intrinsic::arm_mve_pred_i2v,
3954 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
3955 C1);
3956 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
3957 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
3958 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
3959 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
3960 Name ==
3961 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
3962 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
3963 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
3964 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
3965 Name ==
3966 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
3967 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
3968 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
3969 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
3970 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
3971 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
3972 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
3973 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
3974 std::vector<Type *> Tys;
3975 unsigned ID = CI->getIntrinsicID();
3976 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
3977 switch (ID) {
3978 case Intrinsic::arm_mve_mull_int_predicated:
3979 case Intrinsic::arm_mve_vqdmull_predicated:
3980 case Intrinsic::arm_mve_vldr_gather_base_predicated:
3981 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
3982 break;
3983 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
3984 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
3985 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
3986 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
3987 V2I1Ty};
3988 break;
3989 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
3990 Tys = {CI->getType(), CI->getOperand(0)->getType(),
3991 CI->getOperand(1)->getType(), V2I1Ty};
3992 break;
3993 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
3994 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
3995 CI->getOperand(2)->getType(), V2I1Ty};
3996 break;
3997 case Intrinsic::arm_cde_vcx1q_predicated:
3998 case Intrinsic::arm_cde_vcx1qa_predicated:
3999 case Intrinsic::arm_cde_vcx2q_predicated:
4000 case Intrinsic::arm_cde_vcx2qa_predicated:
4001 case Intrinsic::arm_cde_vcx3q_predicated:
4002 case Intrinsic::arm_cde_vcx3qa_predicated:
4003 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4004 break;
4005 default:
4006 llvm_unreachable("Unhandled Intrinsic!");
4007 }
4008
4009 std::vector<Value *> Ops;
4010 for (Value *Op : CI->args()) {
4011 Type *Ty = Op->getType();
4012 if (Ty->getScalarSizeInBits() == 1) {
4013 Value *C1 = Builder.CreateCall(
4015 F->getParent(), Intrinsic::arm_mve_pred_v2i,
4016 {VectorType::get(Builder.getInt1Ty(), 4, false)}),
4017 Op);
4018 Op = Builder.CreateCall(
4019 Intrinsic::getDeclaration(F->getParent(),
4020 Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
4021 C1);
4022 }
4023 Ops.push_back(Op);
4024 }
4025
4026 Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
4027 return Builder.CreateCall(Fn, Ops, CI->getName());
4028 }
4029 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4030}
4031
4032// These are expected to have the arguments:
4033// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4034//
4035// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4036//
4038 Function *F, IRBuilder<> &Builder) {
4039 AtomicRMWInst::BinOp RMWOp =
4041 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4042 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4043 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4044 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4045 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
4046
4047 unsigned NumOperands = CI->getNumOperands();
4048 if (NumOperands < 3) // Malformed bitcode.
4049 return nullptr;
4050
4051 Value *Ptr = CI->getArgOperand(0);
4052 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4053 if (!PtrTy) // Malformed.
4054 return nullptr;
4055
4056 Value *Val = CI->getArgOperand(1);
4057 if (Val->getType() != CI->getType()) // Malformed.
4058 return nullptr;
4059
4060 ConstantInt *OrderArg = nullptr;
4061 bool IsVolatile = false;
4062
4063 // These should have 5 arguments (plus the callee). A separate version of the
4064 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4065 if (NumOperands > 3)
4066 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4067
4068 // Ignore scope argument at 3
4069
4070 if (NumOperands > 5) {
4071 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4072 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4073 }
4074
4075 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
4076 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4077 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4078 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
4079 Order = AtomicOrdering::SequentiallyConsistent;
4080
4081 LLVMContext &Ctx = F->getContext();
4082
4083 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4084 Type *RetTy = CI->getType();
4085 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4086 if (VT->getElementType()->isIntegerTy(16)) {
4087 VectorType *AsBF16 =
4088 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4089 Val = Builder.CreateBitCast(Val, AsBF16);
4090 }
4091 }
4092
4093 // The scope argument never really worked correctly. Use agent as the most
4094 // conservative option which should still always produce the instruction.
4095 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4096 AtomicRMWInst *RMW =
4097 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4098
4099 if (PtrTy->getAddressSpace() != 3) {
4100 RMW->setMetadata("amdgpu.no.fine.grained.memory",
4101 MDNode::get(F->getContext(), {}));
4102 }
4103
4104 if (IsVolatile)
4105 RMW->setVolatile(true);
4106
4107 return Builder.CreateBitCast(RMW, RetTy);
4108}
4109
4110/// Helper to unwrap intrinsic call MetadataAsValue operands.
4111template <typename MDType>
4112static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {
4113 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))
4114 return dyn_cast<MDType>(MAV->getMetadata());
4115 return nullptr;
4116}
4117
4118/// Convert debug intrinsic calls to non-instruction debug records.
4119/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4120/// \p CI - The debug intrinsic call.
4122 DbgRecord *DR = nullptr;
4123 if (Name == "label") {
4124 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());
4125 } else if (Name == "assign") {
4126 DR = new DbgVariableRecord(
4127 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4128 unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),
4129 unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),
4130 CI->getDebugLoc());
4131 } else if (Name == "declare") {
4132 DR = new DbgVariableRecord(
4133 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),
4134 unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),
4135 DbgVariableRecord::LocationType::Declare);
4136 } else if (Name == "addr") {
4137 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4138 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);
4139 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
4140 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),
4141 unwrapMAVOp<DILocalVariable>(CI, 1), Expr,
4142 CI->getDebugLoc());
4143 } else if (Name == "value") {
4144 // An old version of dbg.value had an extra offset argument.
4145 unsigned VarOp = 1;
4146 unsigned ExprOp = 2;
4147 if (CI->arg_size() == 4) {
4148 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));
4149 // Nonzero offset dbg.values get dropped without a replacement.
4150 if (!Offset || !Offset->isZeroValue())
4151 return;
4152 VarOp = 2;
4153 ExprOp = 3;
4154 }
4155 DR = new DbgVariableRecord(
4156 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),
4157 unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());
4158 }
4159 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4160 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());