LLVM 22.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/Loads.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfo.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsHexagon.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
54#include "llvm/IR/Statepoint.h"
55#include "llvm/IR/Type.h"
56#include "llvm/IR/User.h"
57#include "llvm/IR/Value.h"
58#include "llvm/IR/ValueHandle.h"
63#include "llvm/Support/Debug.h"
74#include <algorithm>
75#include <cassert>
76#include <cstdint>
77#include <optional>
78#include <utility>
79#include <vector>
80
81#define DEBUG_TYPE "instcombine"
83
84using namespace llvm;
85using namespace PatternMatch;
86
87STATISTIC(NumSimplified, "Number of library calls simplified");
88
90 "instcombine-guard-widening-window",
91 cl::init(3),
92 cl::desc("How wide an instruction window to bypass looking for "
93 "another guard"));
94
95/// Return the specified type promoted as it would be to pass though a va_arg
96/// area.
98 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
99 if (ITy->getBitWidth() < 32)
100 return Type::getInt32Ty(Ty->getContext());
101 }
102 return Ty;
103}
104
105/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
106/// TODO: This should probably be integrated with visitAllocSites, but that
107/// requires a deeper change to allow either unread or unwritten objects.
109 auto *Src = MI->getRawSource();
110 while (isa<GetElementPtrInst>(Src)) {
111 if (!Src->hasOneUse())
112 return false;
113 Src = cast<Instruction>(Src)->getOperand(0);
114 }
115 return isa<AllocaInst>(Src) && Src->hasOneUse();
116}
117
119 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
120 MaybeAlign CopyDstAlign = MI->getDestAlign();
121 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
122 MI->setDestAlignment(DstAlign);
123 return MI;
124 }
125
126 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
127 MaybeAlign CopySrcAlign = MI->getSourceAlign();
128 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
129 MI->setSourceAlignment(SrcAlign);
130 return MI;
131 }
132
133 // If we have a store to a location which is known constant, we can conclude
134 // that the store must be storing the constant value (else the memory
135 // wouldn't be constant), and this must be a noop.
136 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
137 // Set the size of the copy to 0, it will be deleted on the next iteration.
138 MI->setLength((uint64_t)0);
139 return MI;
140 }
141
142 // If the source is provably undef, the memcpy/memmove doesn't do anything
143 // (unless the transfer is volatile).
144 if (hasUndefSource(MI) && !MI->isVolatile()) {
145 // Set the size of the copy to 0, it will be deleted on the next iteration.
146 MI->setLength((uint64_t)0);
147 return MI;
148 }
149
150 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
151 // load/store.
152 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
153 if (!MemOpLength) return nullptr;
154
155 // Source and destination pointer types are always "i8*" for intrinsic. See
156 // if the size is something we can handle with a single primitive load/store.
157 // A single load+store correctly handles overlapping memory in the memmove
158 // case.
159 uint64_t Size = MemOpLength->getLimitedValue();
160 assert(Size && "0-sized memory transferring should be removed already.");
161
162 if (Size > 8 || (Size&(Size-1)))
163 return nullptr; // If not 1/2/4/8 bytes, exit.
164
165 // If it is an atomic and alignment is less than the size then we will
166 // introduce the unaligned memory access which will be later transformed
167 // into libcall in CodeGen. This is not evident performance gain so disable
168 // it now.
169 if (MI->isAtomic())
170 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
171 return nullptr;
172
173 // Use an integer load+store unless we can find something better.
174 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
175
176 // If the memcpy has metadata describing the members, see if we can get the
177 // TBAA, scope and noalias tags describing our copy.
178 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
179
180 Value *Src = MI->getArgOperand(1);
181 Value *Dest = MI->getArgOperand(0);
182 LoadInst *L = Builder.CreateLoad(IntType, Src);
183 // Alignment from the mem intrinsic will be better, so use it.
184 L->setAlignment(*CopySrcAlign);
185 L->setAAMetadata(AACopyMD);
186 MDNode *LoopMemParallelMD =
187 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
188 if (LoopMemParallelMD)
189 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
190 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
191 if (AccessGroupMD)
192 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
193
194 StoreInst *S = Builder.CreateStore(L, Dest);
195 // Alignment from the mem intrinsic will be better, so use it.
196 S->setAlignment(*CopyDstAlign);
197 S->setAAMetadata(AACopyMD);
198 if (LoopMemParallelMD)
199 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
200 if (AccessGroupMD)
201 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
202 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
203
204 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
205 // non-atomics can be volatile
206 L->setVolatile(MT->isVolatile());
207 S->setVolatile(MT->isVolatile());
208 }
209 if (MI->isAtomic()) {
210 // atomics have to be unordered
211 L->setOrdering(AtomicOrdering::Unordered);
213 }
214
215 // Set the size of the copy to 0, it will be deleted on the next iteration.
216 MI->setLength((uint64_t)0);
217 return MI;
218}
219
221 const Align KnownAlignment =
222 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
223 MaybeAlign MemSetAlign = MI->getDestAlign();
224 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
225 MI->setDestAlignment(KnownAlignment);
226 return MI;
227 }
228
229 // If we have a store to a location which is known constant, we can conclude
230 // that the store must be storing the constant value (else the memory
231 // wouldn't be constant), and this must be a noop.
232 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
233 // Set the size of the copy to 0, it will be deleted on the next iteration.
234 MI->setLength((uint64_t)0);
235 return MI;
236 }
237
238 // Remove memset with an undef value.
239 // FIXME: This is technically incorrect because it might overwrite a poison
240 // value. Change to PoisonValue once #52930 is resolved.
241 if (isa<UndefValue>(MI->getValue())) {
242 // Set the size of the copy to 0, it will be deleted on the next iteration.
243 MI->setLength((uint64_t)0);
244 return MI;
245 }
246
247 // Extract the length and alignment and fill if they are constant.
248 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
249 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
250 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
251 return nullptr;
252 const uint64_t Len = LenC->getLimitedValue();
253 assert(Len && "0-sized memory setting should be removed already.");
254 const Align Alignment = MI->getDestAlign().valueOrOne();
255
256 // If it is an atomic and alignment is less than the size then we will
257 // introduce the unaligned memory access which will be later transformed
258 // into libcall in CodeGen. This is not evident performance gain so disable
259 // it now.
260 if (MI->isAtomic() && Alignment < Len)
261 return nullptr;
262
263 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
264 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
265 Value *Dest = MI->getDest();
266
267 // Extract the fill value and store.
268 Constant *FillVal = ConstantInt::get(
269 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
270 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
271 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
272 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
273 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
274 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
275 }
276
277 S->setAlignment(Alignment);
278 if (MI->isAtomic())
280
281 // Set the size of the copy to 0, it will be deleted on the next iteration.
282 MI->setLength((uint64_t)0);
283 return MI;
284 }
285
286 return nullptr;
287}
288
289// TODO, Obvious Missing Transforms:
290// * Narrow width by halfs excluding zero/undef lanes
291Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
292 Value *LoadPtr = II.getArgOperand(0);
293 const Align Alignment = II.getParamAlign(0).valueOrOne();
294
295 // If the mask is all ones or undefs, this is a plain vector load of the 1st
296 // argument.
297 if (maskIsAllOneOrUndef(II.getArgOperand(1))) {
298 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
299 "unmaskedload");
300 L->copyMetadata(II);
301 return L;
302 }
303
304 // If we can unconditionally load from this address, replace with a
305 // load/select idiom. TODO: use DT for context sensitive query
306 if (isDereferenceablePointer(LoadPtr, II.getType(),
307 II.getDataLayout(), &II, &AC)) {
308 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
309 "unmaskedload");
310 LI->copyMetadata(II);
311 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
312 }
313
314 return nullptr;
315}
316
317// TODO, Obvious Missing Transforms:
318// * Single constant active lane -> store
319// * Narrow width by halfs excluding zero/undef lanes
320Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
321 Value *StorePtr = II.getArgOperand(1);
322 Align Alignment = II.getParamAlign(1).valueOrOne();
323 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
324 if (!ConstMask)
325 return nullptr;
326
327 // If the mask is all zeros, this instruction does nothing.
328 if (maskIsAllZeroOrUndef(ConstMask))
330
331 // If the mask is all ones, this is a plain vector store of the 1st argument.
332 if (maskIsAllOneOrUndef(ConstMask)) {
333 StoreInst *S =
334 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
335 S->copyMetadata(II);
336 return S;
337 }
338
339 if (isa<ScalableVectorType>(ConstMask->getType()))
340 return nullptr;
341
342 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
343 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
344 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
345 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
346 PoisonElts))
347 return replaceOperand(II, 0, V);
348
349 return nullptr;
350}
351
352// TODO, Obvious Missing Transforms:
353// * Single constant active lane load -> load
354// * Dereferenceable address & few lanes -> scalarize speculative load/selects
355// * Adjacent vector addresses -> masked.load
356// * Narrow width by halfs excluding zero/undef lanes
357// * Vector incrementing address -> vector masked load
358Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
359 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
360 if (!ConstMask)
361 return nullptr;
362
363 // Vector splat address w/known mask -> scalar load
364 // Fold the gather to load the source vector first lane
365 // because it is reloading the same value each time
366 if (ConstMask->isAllOnesValue())
367 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
368 auto *VecTy = cast<VectorType>(II.getType());
369 const Align Alignment = II.getParamAlign(0).valueOrOne();
370 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
371 Alignment, "load.scalar");
372 Value *Shuf =
373 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
375 }
376
377 return nullptr;
378}
379
380// TODO, Obvious Missing Transforms:
381// * Single constant active lane -> store
382// * Adjacent vector addresses -> masked.store
383// * Narrow store width by halfs excluding zero/undef lanes
384// * Vector incrementing address -> vector masked store
385Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
386 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
387 if (!ConstMask)
388 return nullptr;
389
390 // If the mask is all zeros, a scatter does nothing.
391 if (maskIsAllZeroOrUndef(ConstMask))
393
394 // Vector splat address -> scalar store
395 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
396 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
397 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
398 if (maskContainsAllOneOrUndef(ConstMask)) {
399 Align Alignment = II.getParamAlign(1).valueOrOne();
400 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
401 Alignment);
402 S->copyMetadata(II);
403 return S;
404 }
405 }
406 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
407 // lastlane), ptr
408 if (ConstMask->isAllOnesValue()) {
409 Align Alignment = II.getParamAlign(1).valueOrOne();
410 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
411 ElementCount VF = WideLoadTy->getElementCount();
412 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
413 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
414 Value *Extract =
415 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
416 StoreInst *S =
417 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
418 S->copyMetadata(II);
419 return S;
420 }
421 }
422 if (isa<ScalableVectorType>(ConstMask->getType()))
423 return nullptr;
424
425 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
426 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
427 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
428 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
429 PoisonElts))
430 return replaceOperand(II, 0, V);
431 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
432 PoisonElts))
433 return replaceOperand(II, 1, V);
434
435 return nullptr;
436}
437
438/// This function transforms launder.invariant.group and strip.invariant.group
439/// like:
440/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
441/// launder(strip(%x)) -> launder(%x)
442/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
443/// strip(launder(%x)) -> strip(%x)
444/// This is legal because it preserves the most recent information about
445/// the presence or absence of invariant.group.
447 InstCombinerImpl &IC) {
448 auto *Arg = II.getArgOperand(0);
449 auto *StrippedArg = Arg->stripPointerCasts();
450 auto *StrippedInvariantGroupsArg = StrippedArg;
451 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
452 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
453 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
454 break;
455 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
456 }
457 if (StrippedArg == StrippedInvariantGroupsArg)
458 return nullptr; // No launders/strips to remove.
459
460 Value *Result = nullptr;
461
462 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
463 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
464 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
465 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
466 else
468 "simplifyInvariantGroupIntrinsic only handles launder and strip");
469 if (Result->getType()->getPointerAddressSpace() !=
470 II.getType()->getPointerAddressSpace())
471 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
472
473 return cast<Instruction>(Result);
474}
475
477 assert((II.getIntrinsicID() == Intrinsic::cttz ||
478 II.getIntrinsicID() == Intrinsic::ctlz) &&
479 "Expected cttz or ctlz intrinsic");
480 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
481 Value *Op0 = II.getArgOperand(0);
482 Value *Op1 = II.getArgOperand(1);
483 Value *X;
484 // ctlz(bitreverse(x)) -> cttz(x)
485 // cttz(bitreverse(x)) -> ctlz(x)
486 if (match(Op0, m_BitReverse(m_Value(X)))) {
487 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
488 Function *F =
489 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
490 return CallInst::Create(F, {X, II.getArgOperand(1)});
491 }
492
493 if (II.getType()->isIntOrIntVectorTy(1)) {
494 // ctlz/cttz i1 Op0 --> not Op0
495 if (match(Op1, m_Zero()))
496 return BinaryOperator::CreateNot(Op0);
497 // If zero is poison, then the input can be assumed to be "true", so the
498 // instruction simplifies to "false".
499 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
500 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
501 }
502
503 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
504 if (II.hasOneUse() && match(Op1, m_Zero()) &&
505 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
506 II.dropUBImplyingAttrsAndMetadata();
507 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
508 }
509
510 Constant *C;
511
512 if (IsTZ) {
513 // cttz(-x) -> cttz(x)
514 if (match(Op0, m_Neg(m_Value(X))))
515 return IC.replaceOperand(II, 0, X);
516
517 // cttz(-x & x) -> cttz(x)
518 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
519 return IC.replaceOperand(II, 0, X);
520
521 // cttz(sext(x)) -> cttz(zext(x))
522 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
523 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
524 auto *CttzZext =
525 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
526 return IC.replaceInstUsesWith(II, CttzZext);
527 }
528
529 // Zext doesn't change the number of trailing zeros, so narrow:
530 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
531 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
532 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
533 IC.Builder.getTrue());
534 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
535 return IC.replaceInstUsesWith(II, ZextCttz);
536 }
537
538 // cttz(abs(x)) -> cttz(x)
539 // cttz(nabs(x)) -> cttz(x)
540 Value *Y;
542 if (SPF == SPF_ABS || SPF == SPF_NABS)
543 return IC.replaceOperand(II, 0, X);
544
546 return IC.replaceOperand(II, 0, X);
547
548 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
549 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
550 match(Op1, m_One())) {
551 Value *ConstCttz =
552 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
553 return BinaryOperator::CreateAdd(ConstCttz, X);
554 }
555
556 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
557 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
558 match(Op1, m_One())) {
559 Value *ConstCttz =
560 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
561 return BinaryOperator::CreateSub(ConstCttz, X);
562 }
563
564 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
565 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
566 Value *Width =
567 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
568 return BinaryOperator::CreateSub(Width, X);
569 }
570 } else {
571 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
572 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
573 match(Op1, m_One())) {
574 Value *ConstCtlz =
575 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
576 return BinaryOperator::CreateAdd(ConstCtlz, X);
577 }
578
579 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
580 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
581 match(Op1, m_One())) {
582 Value *ConstCtlz =
583 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
584 return BinaryOperator::CreateSub(ConstCtlz, X);
585 }
586
587 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
588 if (Op0->hasOneUse() &&
589 match(Op0,
591 Type *Ty = II.getType();
592 unsigned BitWidth = Ty->getScalarSizeInBits();
593 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
594 {X, IC.Builder.getFalse()});
595 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
596 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
597 }
598 }
599
600 // cttz(Pow2) -> Log2(Pow2)
601 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
602 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
603 if (IsTZ)
604 return IC.replaceInstUsesWith(II, R);
605 BinaryOperator *BO = BinaryOperator::CreateSub(
606 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
607 R);
608 BO->setHasNoSignedWrap();
610 return BO;
611 }
612
613 KnownBits Known = IC.computeKnownBits(Op0, &II);
614
615 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
616 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
617 : Known.countMaxLeadingZeros();
618 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
619 : Known.countMinLeadingZeros();
620
621 // If all bits above (ctlz) or below (cttz) the first known one are known
622 // zero, this value is constant.
623 // FIXME: This should be in InstSimplify because we're replacing an
624 // instruction with a constant.
625 if (PossibleZeros == DefiniteZeros) {
626 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
627 return IC.replaceInstUsesWith(II, C);
628 }
629
630 // If the input to cttz/ctlz is known to be non-zero,
631 // then change the 'ZeroIsPoison' parameter to 'true'
632 // because we know the zero behavior can't affect the result.
633 if (!Known.One.isZero() ||
635 if (!match(II.getArgOperand(1), m_One()))
636 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
637 }
638
639 // Add range attribute since known bits can't completely reflect what we know.
640 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
641 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
642 !II.getMetadata(LLVMContext::MD_range)) {
643 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
644 APInt(BitWidth, PossibleZeros + 1));
645 II.addRangeRetAttr(Range);
646 return &II;
647 }
648
649 return nullptr;
650}
651
653 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
654 "Expected ctpop intrinsic");
655 Type *Ty = II.getType();
656 unsigned BitWidth = Ty->getScalarSizeInBits();
657 Value *Op0 = II.getArgOperand(0);
658 Value *X, *Y;
659
660 // ctpop(bitreverse(x)) -> ctpop(x)
661 // ctpop(bswap(x)) -> ctpop(x)
662 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
663 return IC.replaceOperand(II, 0, X);
664
665 // ctpop(rot(x)) -> ctpop(x)
666 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
667 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
668 X == Y)
669 return IC.replaceOperand(II, 0, X);
670
671 // ctpop(x | -x) -> bitwidth - cttz(x, false)
672 if (Op0->hasOneUse() &&
673 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
674 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
675 {X, IC.Builder.getFalse()});
676 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
677 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
678 }
679
680 // ctpop(~x & (x - 1)) -> cttz(x, false)
681 if (match(Op0,
683 Function *F =
684 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
685 return CallInst::Create(F, {X, IC.Builder.getFalse()});
686 }
687
688 // Zext doesn't change the number of set bits, so narrow:
689 // ctpop (zext X) --> zext (ctpop X)
690 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
691 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
692 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
693 }
694
695 KnownBits Known(BitWidth);
696 IC.computeKnownBits(Op0, Known, &II);
697
698 // If all bits are zero except for exactly one fixed bit, then the result
699 // must be 0 or 1, and we can get that answer by shifting to LSB:
700 // ctpop (X & 32) --> (X & 32) >> 5
701 // TODO: Investigate removing this as its likely unnecessary given the below
702 // `isKnownToBeAPowerOfTwo` check.
703 if ((~Known.Zero).isPowerOf2())
704 return BinaryOperator::CreateLShr(
705 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
706
707 // More generally we can also handle non-constant power of 2 patterns such as
708 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
709 // ctpop(Pow2OrZero) --> icmp ne X, 0
710 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
711 return CastInst::Create(Instruction::ZExt,
714 Ty);
715
716 // Add range attribute since known bits can't completely reflect what we know.
717 if (BitWidth != 1) {
718 ConstantRange OldRange =
719 II.getRange().value_or(ConstantRange::getFull(BitWidth));
720
721 unsigned Lower = Known.countMinPopulation();
722 unsigned Upper = Known.countMaxPopulation() + 1;
723
724 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
726 Lower = 1;
727
729 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
730
731 if (Range != OldRange) {
732 II.addRangeRetAttr(Range);
733 return &II;
734 }
735 }
736
737 return nullptr;
738}
739
740/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
741/// at most two source operands are actually referenced.
743 bool IsExtension) {
744 // Bail out if the mask is not a constant.
745 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
746 if (!C)
747 return nullptr;
748
749 auto *RetTy = cast<FixedVectorType>(II.getType());
750 unsigned NumIndexes = RetTy->getNumElements();
751
752 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
753 if (!RetTy->getElementType()->isIntegerTy(8) ||
754 (NumIndexes != 8 && NumIndexes != 16))
755 return nullptr;
756
757 // For tbx instructions, the first argument is the "fallback" vector, which
758 // has the same length as the mask and return type.
759 unsigned int StartIndex = (unsigned)IsExtension;
760 auto *SourceTy =
761 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
762 // Note that the element count of each source vector does *not* need to be the
763 // same as the element count of the return type and mask! All source vectors
764 // must have the same element count as each other, though.
765 unsigned NumElementsPerSource = SourceTy->getNumElements();
766
767 // There are no tbl/tbx intrinsics for which the destination size exceeds the
768 // source size. However, our definitions of the intrinsics, at least in
769 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
770 // *could* technically happen.
771 if (NumIndexes > NumElementsPerSource)
772 return nullptr;
773
774 // The tbl/tbx intrinsics take several source operands followed by a mask
775 // operand.
776 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
777
778 // Map input operands to shuffle indices. This also helpfully deduplicates the
779 // input arguments, in case the same value is passed as an argument multiple
780 // times.
781 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
782 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
783 PoisonValue::get(SourceTy)};
784
785 int Indexes[16];
786 for (unsigned I = 0; I < NumIndexes; ++I) {
787 Constant *COp = C->getAggregateElement(I);
788
789 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
790 return nullptr;
791
792 if (isa<UndefValue>(COp)) {
793 Indexes[I] = -1;
794 continue;
795 }
796
797 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
798 // The index of the input argument that this index references (0 = first
799 // source argument, etc).
800 unsigned SourceOperandIndex = Index / NumElementsPerSource;
801 // The index of the element at that source operand.
802 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
803
804 Value *SourceOperand;
805 if (SourceOperandIndex >= NumSourceOperands) {
806 // This index is out of bounds. Map it to index into either the fallback
807 // vector (tbx) or vector of zeroes (tbl).
808 SourceOperandIndex = NumSourceOperands;
809 if (IsExtension) {
810 // For out-of-bounds indices in tbx, choose the `I`th element of the
811 // fallback.
812 SourceOperand = II.getArgOperand(0);
813 SourceOperandElementIndex = I;
814 } else {
815 // Otherwise, choose some element from the dummy vector of zeroes (we'll
816 // always choose the first).
817 SourceOperand = Constant::getNullValue(SourceTy);
818 SourceOperandElementIndex = 0;
819 }
820 } else {
821 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
822 }
823
824 // The source operand may be the fallback vector, which may not have the
825 // same number of elements as the source vector. In that case, we *could*
826 // choose to extend its length with another shufflevector, but it's simpler
827 // to just bail instead.
828 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
829 NumElementsPerSource)
830 return nullptr;
831
832 // We now know the source operand referenced by this index. Make it a
833 // shufflevector operand, if it isn't already.
834 unsigned NumSlots = ValueToShuffleSlot.size();
835 // This shuffle references more than two sources, and hence cannot be
836 // represented as a shufflevector.
837 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
838 return nullptr;
839
840 auto [It, Inserted] =
841 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
842 if (Inserted)
843 ShuffleOperands[It->getSecond()] = SourceOperand;
844
845 unsigned RemappedIndex =
846 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
847 Indexes[I] = RemappedIndex;
848 }
849
851 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
852 return IC.replaceInstUsesWith(II, Shuf);
853}
854
855// Returns true iff the 2 intrinsics have the same operands, limiting the
856// comparison to the first NumOperands.
857static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
858 unsigned NumOperands) {
859 assert(I.arg_size() >= NumOperands && "Not enough operands");
860 assert(E.arg_size() >= NumOperands && "Not enough operands");
861 for (unsigned i = 0; i < NumOperands; i++)
862 if (I.getArgOperand(i) != E.getArgOperand(i))
863 return false;
864 return true;
865}
866
867// Remove trivially empty start/end intrinsic ranges, i.e. a start
868// immediately followed by an end (ignoring debuginfo or other
869// start/end intrinsics in between). As this handles only the most trivial
870// cases, tracking the nesting level is not needed:
871//
872// call @llvm.foo.start(i1 0)
873// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
874// call @llvm.foo.end(i1 0)
875// call @llvm.foo.end(i1 0) ; &I
876static bool
878 std::function<bool(const IntrinsicInst &)> IsStart) {
879 // We start from the end intrinsic and scan backwards, so that InstCombine
880 // has already processed (and potentially removed) all the instructions
881 // before the end intrinsic.
882 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
883 for (; BI != BE; ++BI) {
884 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
885 if (I->isDebugOrPseudoInst() ||
886 I->getIntrinsicID() == EndI.getIntrinsicID())
887 continue;
888 if (IsStart(*I)) {
889 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
891 IC.eraseInstFromFunction(EndI);
892 return true;
893 }
894 // Skip start intrinsics that don't pair with this end intrinsic.
895 continue;
896 }
897 }
898 break;
899 }
900
901 return false;
902}
903
905 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
906 // Bail out on the case where the source va_list of a va_copy is destroyed
907 // immediately by a follow-up va_end.
908 return II.getIntrinsicID() == Intrinsic::vastart ||
909 (II.getIntrinsicID() == Intrinsic::vacopy &&
910 I.getArgOperand(0) != II.getArgOperand(1));
911 });
912 return nullptr;
913}
914
916 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
917 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
918 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
919 Call.setArgOperand(0, Arg1);
920 Call.setArgOperand(1, Arg0);
921 return &Call;
922 }
923 return nullptr;
924}
925
926/// Creates a result tuple for an overflow intrinsic \p II with a given
927/// \p Result and a constant \p Overflow value.
929 Constant *Overflow) {
930 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
931 StructType *ST = cast<StructType>(II->getType());
932 Constant *Struct = ConstantStruct::get(ST, V);
933 return InsertValueInst::Create(Struct, Result, 0);
934}
935
937InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
938 WithOverflowInst *WO = cast<WithOverflowInst>(II);
939 Value *OperationResult = nullptr;
940 Constant *OverflowResult = nullptr;
941 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
942 WO->getRHS(), *WO, OperationResult, OverflowResult))
943 return createOverflowTuple(WO, OperationResult, OverflowResult);
944
945 // See whether we can optimize the overflow check with assumption information.
946 for (User *U : WO->users()) {
947 if (!match(U, m_ExtractValue<1>(m_Value())))
948 continue;
949
950 for (auto &AssumeVH : AC.assumptionsFor(U)) {
951 if (!AssumeVH)
952 continue;
953 CallInst *I = cast<CallInst>(AssumeVH);
954 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
955 continue;
956 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
957 /*AllowEphemerals=*/true))
958 continue;
959 Value *Result =
960 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
961 Result->takeName(WO);
962 if (auto *Inst = dyn_cast<Instruction>(Result)) {
963 if (WO->isSigned())
964 Inst->setHasNoSignedWrap();
965 else
966 Inst->setHasNoUnsignedWrap();
967 }
968 return createOverflowTuple(WO, Result,
969 ConstantInt::getFalse(U->getType()));
970 }
971 }
972
973 return nullptr;
974}
975
976static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
977 Ty = Ty->getScalarType();
978 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
979}
980
981static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
982 Ty = Ty->getScalarType();
983 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
984}
985
986/// \returns the compare predicate type if the test performed by
987/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
988/// floating-point environment assumed for \p F for type \p Ty
990 const Function &F, Type *Ty) {
991 switch (static_cast<unsigned>(Mask)) {
992 case fcZero:
993 if (inputDenormalIsIEEE(F, Ty))
994 return FCmpInst::FCMP_OEQ;
995 break;
996 case fcZero | fcSubnormal:
997 if (inputDenormalIsDAZ(F, Ty))
998 return FCmpInst::FCMP_OEQ;
999 break;
1000 case fcPositive | fcNegZero:
1001 if (inputDenormalIsIEEE(F, Ty))
1002 return FCmpInst::FCMP_OGE;
1003 break;
1005 if (inputDenormalIsDAZ(F, Ty))
1006 return FCmpInst::FCMP_OGE;
1007 break;
1009 if (inputDenormalIsIEEE(F, Ty))
1010 return FCmpInst::FCMP_OGT;
1011 break;
1012 case fcNegative | fcPosZero:
1013 if (inputDenormalIsIEEE(F, Ty))
1014 return FCmpInst::FCMP_OLE;
1015 break;
1017 if (inputDenormalIsDAZ(F, Ty))
1018 return FCmpInst::FCMP_OLE;
1019 break;
1021 if (inputDenormalIsIEEE(F, Ty))
1022 return FCmpInst::FCMP_OLT;
1023 break;
1024 case fcPosNormal | fcPosInf:
1025 if (inputDenormalIsDAZ(F, Ty))
1026 return FCmpInst::FCMP_OGT;
1027 break;
1028 case fcNegNormal | fcNegInf:
1029 if (inputDenormalIsDAZ(F, Ty))
1030 return FCmpInst::FCMP_OLT;
1031 break;
1032 case ~fcZero & ~fcNan:
1033 if (inputDenormalIsIEEE(F, Ty))
1034 return FCmpInst::FCMP_ONE;
1035 break;
1036 case ~(fcZero | fcSubnormal) & ~fcNan:
1037 if (inputDenormalIsDAZ(F, Ty))
1038 return FCmpInst::FCMP_ONE;
1039 break;
1040 default:
1041 break;
1042 }
1043
1045}
1046
1047Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1048 Value *Src0 = II.getArgOperand(0);
1049 Value *Src1 = II.getArgOperand(1);
1050 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1051 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1052 const bool IsUnordered = (Mask & fcNan) == fcNan;
1053 const bool IsOrdered = (Mask & fcNan) == fcNone;
1054 const FPClassTest OrderedMask = Mask & ~fcNan;
1055 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1056
1057 const bool IsStrict =
1058 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1059
1060 Value *FNegSrc;
1061 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
1062 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1063
1064 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
1065 return replaceOperand(II, 0, FNegSrc);
1066 }
1067
1068 Value *FAbsSrc;
1069 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
1070 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
1071 return replaceOperand(II, 0, FAbsSrc);
1072 }
1073
1074 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1075 (IsOrdered || IsUnordered) && !IsStrict) {
1076 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1077 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1078 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1079 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1081 FCmpInst::Predicate Pred =
1082 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1083 if (OrderedInvertedMask == fcInf)
1084 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1085
1086 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Src0);
1087 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1088 CmpInf->takeName(&II);
1089 return replaceInstUsesWith(II, CmpInf);
1090 }
1091
1092 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1093 (IsOrdered || IsUnordered) && !IsStrict) {
1094 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1095 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1096 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1097 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1098 Constant *Inf =
1099 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1100 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1101 : Builder.CreateFCmpOEQ(Src0, Inf);
1102
1103 EqInf->takeName(&II);
1104 return replaceInstUsesWith(II, EqInf);
1105 }
1106
1107 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1108 (IsOrdered || IsUnordered) && !IsStrict) {
1109 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1110 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1111 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1112 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1114 OrderedInvertedMask == fcNegInf);
1115 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1116 : Builder.CreateFCmpONE(Src0, Inf);
1117 NeInf->takeName(&II);
1118 return replaceInstUsesWith(II, NeInf);
1119 }
1120
1121 if (Mask == fcNan && !IsStrict) {
1122 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1123 // exceptions.
1124 Value *IsNan =
1125 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1126 IsNan->takeName(&II);
1127 return replaceInstUsesWith(II, IsNan);
1128 }
1129
1130 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1131 // Equivalent of !isnan. Replace with standard fcmp.
1132 Value *FCmp =
1133 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1134 FCmp->takeName(&II);
1135 return replaceInstUsesWith(II, FCmp);
1136 }
1137
1139
1140 // Try to replace with an fcmp with 0
1141 //
1142 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1143 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1144 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1145 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1146 //
1147 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1148 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1149 //
1150 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1151 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1152 //
1153 if (!IsStrict && (IsOrdered || IsUnordered) &&
1154 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1155 Src0->getType())) !=
1158 // Equivalent of == 0.
1159 Value *FCmp = Builder.CreateFCmp(
1160 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1161 Src0, Zero);
1162
1163 FCmp->takeName(&II);
1164 return replaceInstUsesWith(II, FCmp);
1165 }
1166
1167 KnownFPClass Known = computeKnownFPClass(Src0, Mask, &II);
1168
1169 // Clear test bits we know must be false from the source value.
1170 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1171 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1172 if ((Mask & Known.KnownFPClasses) != Mask) {
1173 II.setArgOperand(
1174 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1175 return &II;
1176 }
1177
1178 // If none of the tests which can return false are possible, fold to true.
1179 // fp_class (nnan x), ~(qnan|snan) -> true
1180 // fp_class (ninf x), ~(ninf|pinf) -> true
1181 if (Mask == Known.KnownFPClasses)
1182 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1183
1184 return nullptr;
1185}
1186
1187static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1188 KnownBits Known = computeKnownBits(Op, SQ);
1189 if (Known.isNonNegative())
1190 return false;
1191 if (Known.isNegative())
1192 return true;
1193
1194 Value *X, *Y;
1195 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1197
1198 return std::nullopt;
1199}
1200
1201static std::optional<bool> getKnownSignOrZero(Value *Op,
1202 const SimplifyQuery &SQ) {
1203 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1204 return Sign;
1205
1206 Value *X, *Y;
1207 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1209
1210 return std::nullopt;
1211}
1212
1213/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1214static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1215 const SimplifyQuery &SQ) {
1216 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1217 if (!Known1)
1218 return false;
1219 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1220 if (!Known0)
1221 return false;
1222 return *Known0 == *Known1;
1223}
1224
1225/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1226/// can trigger other combines.
1228 InstCombiner::BuilderTy &Builder) {
1229 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1230 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1231 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1232 "Expected a min or max intrinsic");
1233
1234 // TODO: Match vectors with undef elements, but undef may not propagate.
1235 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1236 Value *X;
1237 const APInt *C0, *C1;
1238 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1239 !match(Op1, m_APInt(C1)))
1240 return nullptr;
1241
1242 // Check for necessary no-wrap and overflow constraints.
1243 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1244 auto *Add = cast<BinaryOperator>(Op0);
1245 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1246 (!IsSigned && !Add->hasNoUnsignedWrap()))
1247 return nullptr;
1248
1249 // If the constant difference overflows, then instsimplify should reduce the
1250 // min/max to the add or C1.
1251 bool Overflow;
1252 APInt CDiff =
1253 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1254 assert(!Overflow && "Expected simplify of min/max");
1255
1256 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1257 // Note: the "mismatched" no-overflow setting does not propagate.
1258 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1259 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1260 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1261 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1262}
1263/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1264Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1265 Type *Ty = MinMax1.getType();
1266
1267 // We are looking for a tree of:
1268 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1269 // Where the min and max could be reversed
1270 Instruction *MinMax2;
1271 BinaryOperator *AddSub;
1272 const APInt *MinValue, *MaxValue;
1273 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1274 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1275 return nullptr;
1276 } else if (match(&MinMax1,
1277 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1278 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1279 return nullptr;
1280 } else
1281 return nullptr;
1282
1283 // Check that the constants clamp a saturate, and that the new type would be
1284 // sensible to convert to.
1285 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1286 return nullptr;
1287 // In what bitwidth can this be treated as saturating arithmetics?
1288 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1289 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1290 // good first approximation for what should be done there.
1291 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1292 return nullptr;
1293
1294 // Also make sure that the inner min/max and the add/sub have one use.
1295 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1296 return nullptr;
1297
1298 // Create the new type (which can be a vector type)
1299 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1300
1301 Intrinsic::ID IntrinsicID;
1302 if (AddSub->getOpcode() == Instruction::Add)
1303 IntrinsicID = Intrinsic::sadd_sat;
1304 else if (AddSub->getOpcode() == Instruction::Sub)
1305 IntrinsicID = Intrinsic::ssub_sat;
1306 else
1307 return nullptr;
1308
1309 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1310 // is usually achieved via a sext from a smaller type.
1311 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1312 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1313 return nullptr;
1314
1315 // Finally create and return the sat intrinsic, truncated to the new type
1316 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1317 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1318 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1319 return CastInst::Create(Instruction::SExt, Sat, Ty);
1320}
1321
1322
1323/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1324/// can only be one of two possible constant values -- turn that into a select
1325/// of constants.
1327 InstCombiner::BuilderTy &Builder) {
1328 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1329 Value *X;
1330 const APInt *C0, *C1;
1331 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1332 return nullptr;
1333
1335 switch (II->getIntrinsicID()) {
1336 case Intrinsic::smax:
1337 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1338 Pred = ICmpInst::ICMP_SGT;
1339 break;
1340 case Intrinsic::smin:
1341 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1342 Pred = ICmpInst::ICMP_SLT;
1343 break;
1344 case Intrinsic::umax:
1345 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1346 Pred = ICmpInst::ICMP_UGT;
1347 break;
1348 case Intrinsic::umin:
1349 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1350 Pred = ICmpInst::ICMP_ULT;
1351 break;
1352 default:
1353 llvm_unreachable("Expected min/max intrinsic");
1354 }
1355 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1356 return nullptr;
1357
1358 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1359 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1360 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1361 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1362}
1363
1364/// If this min/max has a constant operand and an operand that is a matching
1365/// min/max with a constant operand, constant-fold the 2 constant operands.
1367 IRBuilderBase &Builder,
1368 const SimplifyQuery &SQ) {
1369 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1370 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1371 if (!LHS)
1372 return nullptr;
1373
1374 Constant *C0, *C1;
1375 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1376 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1377 return nullptr;
1378
1379 // max (max X, C0), C1 --> max X, (max C0, C1)
1380 // min (min X, C0), C1 --> min X, (min C0, C1)
1381 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1382 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1383 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1384 if (InnerMinMaxID != MinMaxID &&
1385 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1386 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1387 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1388 return nullptr;
1389
1391 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1392 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1393 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1394 {LHS->getArgOperand(0), NewC});
1395}
1396
1397/// If this min/max has a matching min/max operand with a constant, try to push
1398/// the constant operand into this instruction. This can enable more folds.
1399static Instruction *
1401 InstCombiner::BuilderTy &Builder) {
1402 // Match and capture a min/max operand candidate.
1403 Value *X, *Y;
1404 Constant *C;
1405 Instruction *Inner;
1407 m_Instruction(Inner),
1409 m_Value(Y))))
1410 return nullptr;
1411
1412 // The inner op must match. Check for constants to avoid infinite loops.
1413 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1414 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1415 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1417 return nullptr;
1418
1419 // max (max X, C), Y --> max (max X, Y), C
1421 MinMaxID, II->getType());
1422 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1423 NewInner->takeName(Inner);
1424 return CallInst::Create(MinMax, {NewInner, C});
1425}
1426
1427/// Reduce a sequence of min/max intrinsics with a common operand.
1429 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1430 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1431 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1432 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1433 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1434 RHS->getIntrinsicID() != MinMaxID ||
1435 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1436 return nullptr;
1437
1438 Value *A = LHS->getArgOperand(0);
1439 Value *B = LHS->getArgOperand(1);
1440 Value *C = RHS->getArgOperand(0);
1441 Value *D = RHS->getArgOperand(1);
1442
1443 // Look for a common operand.
1444 Value *MinMaxOp = nullptr;
1445 Value *ThirdOp = nullptr;
1446 if (LHS->hasOneUse()) {
1447 // If the LHS is only used in this chain and the RHS is used outside of it,
1448 // reuse the RHS min/max because that will eliminate the LHS.
1449 if (D == A || C == A) {
1450 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1451 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1452 MinMaxOp = RHS;
1453 ThirdOp = B;
1454 } else if (D == B || C == B) {
1455 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1456 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1457 MinMaxOp = RHS;
1458 ThirdOp = A;
1459 }
1460 } else {
1461 assert(RHS->hasOneUse() && "Expected one-use operand");
1462 // Reuse the LHS. This will eliminate the RHS.
1463 if (D == A || D == B) {
1464 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1465 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1466 MinMaxOp = LHS;
1467 ThirdOp = C;
1468 } else if (C == A || C == B) {
1469 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1470 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1471 MinMaxOp = LHS;
1472 ThirdOp = D;
1473 }
1474 }
1475
1476 if (!MinMaxOp || !ThirdOp)
1477 return nullptr;
1478
1479 Module *Mod = II->getModule();
1480 Function *MinMax =
1481 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1482 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1483}
1484
1485/// If all arguments of the intrinsic are unary shuffles with the same mask,
1486/// try to shuffle after the intrinsic.
1489 if (!isTriviallyVectorizable(II->getIntrinsicID()) ||
1490 !II->getCalledFunction()->isSpeculatable())
1491 return nullptr;
1492
1493 Value *X;
1494 Constant *C;
1495 ArrayRef<int> Mask;
1496 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1497 return isa<Constant>(Arg.get()) ||
1498 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1499 Arg.getOperandNo(), nullptr);
1500 });
1501 if (!NonConstArg ||
1502 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1503 return nullptr;
1504
1505 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1506 // instructions.
1507 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1508 return nullptr;
1509
1510 // See if all arguments are shuffled with the same mask.
1512 Type *SrcTy = X->getType();
1513 for (Use &Arg : II->args()) {
1514 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1515 Arg.getOperandNo(), nullptr))
1516 NewArgs.push_back(Arg);
1517 else if (match(&Arg,
1518 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1519 X->getType() == SrcTy)
1520 NewArgs.push_back(X);
1521 else if (match(&Arg, m_ImmConstant(C))) {
1522 // If it's a constant, try find the constant that would be shuffled to C.
1523 if (Constant *ShuffledC =
1524 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1525 NewArgs.push_back(ShuffledC);
1526 else
1527 return nullptr;
1528 } else
1529 return nullptr;
1530 }
1531
1532 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1533 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1534 // Result type might be a different vector width.
1535 // TODO: Check that the result type isn't widened?
1536 VectorType *ResTy =
1537 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1538 Value *NewIntrinsic =
1539 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1540 return new ShuffleVectorInst(NewIntrinsic, Mask);
1541}
1542
1543/// If all arguments of the intrinsic are reverses, try to pull the reverse
1544/// after the intrinsic.
1546 if (!isTriviallyVectorizable(II->getIntrinsicID()))
1547 return nullptr;
1548
1549 // At least 1 operand must be a reverse with 1 use because we are creating 2
1550 // instructions.
1551 if (none_of(II->args(), [](Value *V) {
1552 return match(V, m_OneUse(m_VecReverse(m_Value())));
1553 }))
1554 return nullptr;
1555
1556 Value *X;
1557 Constant *C;
1558 SmallVector<Value *> NewArgs;
1559 for (Use &Arg : II->args()) {
1560 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1561 Arg.getOperandNo(), nullptr))
1562 NewArgs.push_back(Arg);
1563 else if (match(&Arg, m_VecReverse(m_Value(X))))
1564 NewArgs.push_back(X);
1565 else if (isSplatValue(Arg))
1566 NewArgs.push_back(Arg);
1567 else if (match(&Arg, m_ImmConstant(C)))
1568 NewArgs.push_back(Builder.CreateVectorReverse(C));
1569 else
1570 return nullptr;
1571 }
1572
1573 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1574 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1575 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1576 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1577 return Builder.CreateVectorReverse(NewIntrinsic);
1578}
1579
1580/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1581/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1582/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1583template <Intrinsic::ID IntrID>
1585 InstCombiner::BuilderTy &Builder) {
1586 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1587 "This helper only supports BSWAP and BITREVERSE intrinsics");
1588
1589 Value *X, *Y;
1590 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1591 // don't match ConstantExpr that aren't meaningful for this transform.
1594 Value *OldReorderX, *OldReorderY;
1596
1597 // If both X and Y are bswap/bitreverse, the transform reduces the number
1598 // of instructions even if there's multiuse.
1599 // If only one operand is bswap/bitreverse, we need to ensure the operand
1600 // have only one use.
1601 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1602 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1603 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1604 }
1605
1606 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1607 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1608 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1609 }
1610
1611 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1612 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1613 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1614 }
1615 }
1616 return nullptr;
1617}
1618
1619/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1620/// `f(f(x, y), y) == f(x, y)` holds.
1622 switch (IID) {
1623 case Intrinsic::smax:
1624 case Intrinsic::smin:
1625 case Intrinsic::umax:
1626 case Intrinsic::umin:
1627 case Intrinsic::maximum:
1628 case Intrinsic::minimum:
1629 case Intrinsic::maximumnum:
1630 case Intrinsic::minimumnum:
1631 case Intrinsic::maxnum:
1632 case Intrinsic::minnum:
1633 return true;
1634 default:
1635 return false;
1636 }
1637}
1638
1639/// Attempt to simplify value-accumulating recurrences of kind:
1640/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1641/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1642/// And let the idempotent binary intrinsic be hoisted, when the operands are
1643/// known to be loop-invariant.
1645 IntrinsicInst *II) {
1646 PHINode *PN;
1647 Value *Init, *OtherOp;
1648
1649 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1650 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1651 auto IID = II->getIntrinsicID();
1652 if (!isIdempotentBinaryIntrinsic(IID) ||
1654 !IC.getDominatorTree().dominates(OtherOp, PN))
1655 return nullptr;
1656
1657 auto *InvariantBinaryInst =
1658 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1659 if (isa<FPMathOperator>(InvariantBinaryInst))
1660 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1661 return InvariantBinaryInst;
1662}
1663
1664static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1665 if (!CanReorderLanes)
1666 return nullptr;
1667
1668 Value *V;
1669 if (match(Arg, m_VecReverse(m_Value(V))))
1670 return V;
1671
1672 ArrayRef<int> Mask;
1673 if (!isa<FixedVectorType>(Arg->getType()) ||
1674 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1675 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1676 return nullptr;
1677
1678 int Sz = Mask.size();
1679 SmallBitVector UsedIndices(Sz);
1680 for (int Idx : Mask) {
1681 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1682 return nullptr;
1683 UsedIndices.set(Idx);
1684 }
1685
1686 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1687 // other changes.
1688 return UsedIndices.all() ? V : nullptr;
1689}
1690
1691/// Fold an unsigned minimum of trailing or leading zero bits counts:
1692/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
1693/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin
1694/// >> ConstOp))
1695template <Intrinsic::ID IntrID>
1696static Value *
1698 const DataLayout &DL,
1699 InstCombiner::BuilderTy &Builder) {
1700 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1701 "This helper only supports cttz and ctlz intrinsics");
1702
1703 Value *CtOp;
1704 Value *ZeroUndef;
1705 if (!match(I0,
1706 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp), m_Value(ZeroUndef)))))
1707 return nullptr;
1708
1709 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1710 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1711 if (!match(I1, m_CheckedInt(LessBitWidth)))
1712 // We have a constant >= BitWidth (which can be handled by CVP)
1713 // or a non-splat vector with elements < and >= BitWidth
1714 return nullptr;
1715
1716 Type *Ty = I1->getType();
1718 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1719 IntrID == Intrinsic::cttz
1720 ? ConstantInt::get(Ty, 1)
1721 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1722 cast<Constant>(I1), DL);
1723 return Builder.CreateBinaryIntrinsic(
1724 IntrID, Builder.CreateOr(CtOp, NewConst),
1725 ConstantInt::getTrue(ZeroUndef->getType()));
1726}
1727
1728/// Return whether "X LOp (Y ROp Z)" is always equal to
1729/// "(X LOp Y) ROp (X LOp Z)".
1731 bool HasNSW, Intrinsic::ID ROp) {
1732 switch (ROp) {
1733 case Intrinsic::umax:
1734 case Intrinsic::umin:
1735 if (HasNUW && LOp == Instruction::Add)
1736 return true;
1737 if (HasNUW && LOp == Instruction::Shl)
1738 return true;
1739 return false;
1740 case Intrinsic::smax:
1741 case Intrinsic::smin:
1742 return HasNSW && LOp == Instruction::Add;
1743 default:
1744 return false;
1745 }
1746}
1747
1748// Attempts to factorise a common term
1749// in an instruction that has the form "(A op' B) op (C op' D)
1750// where op is an intrinsic and op' is a binop
1751static Value *
1753 InstCombiner::BuilderTy &Builder) {
1754 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1755 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1756
1759
1760 if (!Op0 || !Op1)
1761 return nullptr;
1762
1763 if (Op0->getOpcode() != Op1->getOpcode())
1764 return nullptr;
1765
1766 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1767 return nullptr;
1768
1769 Instruction::BinaryOps InnerOpcode =
1770 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1771 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1772 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1773
1774 if (!leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode))
1775 return nullptr;
1776
1777 Value *A = Op0->getOperand(0);
1778 Value *B = Op0->getOperand(1);
1779 Value *C = Op1->getOperand(0);
1780 Value *D = Op1->getOperand(1);
1781
1782 // Attempts to swap variables such that A equals C or B equals D,
1783 // if the inner operation is commutative.
1784 if (Op0->isCommutative() && A != C && B != D) {
1785 if (A == D || B == C)
1786 std::swap(C, D);
1787 else
1788 return nullptr;
1789 }
1790
1791 BinaryOperator *NewBinop;
1792 if (A == C) {
1793 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1794 NewBinop =
1795 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1796 } else if (B == D) {
1797 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1798 NewBinop =
1799 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1800 } else {
1801 return nullptr;
1802 }
1803
1804 NewBinop->setHasNoUnsignedWrap(HasNUW);
1805 NewBinop->setHasNoSignedWrap(HasNSW);
1806
1807 return NewBinop;
1808}
1809
1811 Value *Arg0 = II->getArgOperand(0);
1812 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1813 if (!ShiftConst)
1814 return nullptr;
1815
1816 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1817 bool AllPositive = true;
1818 bool AllNegative = true;
1819
1820 auto Check = [&](Constant *C) -> bool {
1821 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1822 const APInt &V = CI->getValue();
1823 if (V.isNonNegative()) {
1824 AllNegative = false;
1825 return AllPositive && V.ult(ElemBits);
1826 }
1827 AllPositive = false;
1828 return AllNegative && V.sgt(-ElemBits);
1829 }
1830 return false;
1831 };
1832
1833 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1834 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1835 if (!Check(ShiftConst->getAggregateElement(I)))
1836 return nullptr;
1837 }
1838
1839 } else if (!Check(ShiftConst))
1840 return nullptr;
1841
1842 IRBuilderBase &B = IC.Builder;
1843 if (AllPositive)
1844 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1845
1846 Value *NegAmt = B.CreateNeg(ShiftConst);
1847 Intrinsic::ID IID = II->getIntrinsicID();
1848 const bool IsSigned =
1849 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1850 Value *Result =
1851 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1852 return IC.replaceInstUsesWith(*II, Result);
1853}
1854
1855/// CallInst simplification. This mostly only handles folding of intrinsic
1856/// instructions. For normal calls, it allows visitCallBase to do the heavy
1857/// lifting.
1859 // Don't try to simplify calls without uses. It will not do anything useful,
1860 // but will result in the following folds being skipped.
1861 if (!CI.use_empty()) {
1862 SmallVector<Value *, 8> Args(CI.args());
1863 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1864 SQ.getWithInstruction(&CI)))
1865 return replaceInstUsesWith(CI, V);
1866 }
1867
1868 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1869 return visitFree(CI, FreedOp);
1870
1871 // If the caller function (i.e. us, the function that contains this CallInst)
1872 // is nounwind, mark the call as nounwind, even if the callee isn't.
1873 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1874 CI.setDoesNotThrow();
1875 return &CI;
1876 }
1877
1879 if (!II)
1880 return visitCallBase(CI);
1881
1882 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1883 // instead of in visitCallBase.
1884 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1885 if (auto NumBytes = MI->getLengthInBytes()) {
1886 // memmove/cpy/set of zero bytes is a noop.
1887 if (NumBytes->isZero())
1888 return eraseInstFromFunction(CI);
1889
1890 // For atomic unordered mem intrinsics if len is not a positive or
1891 // not a multiple of element size then behavior is undefined.
1892 if (MI->isAtomic() &&
1893 (NumBytes->isNegative() ||
1894 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1896 assert(MI->getType()->isVoidTy() &&
1897 "non void atomic unordered mem intrinsic");
1898 return eraseInstFromFunction(*MI);
1899 }
1900 }
1901
1902 // No other transformations apply to volatile transfers.
1903 if (MI->isVolatile())
1904 return nullptr;
1905
1907 // memmove(x,x,size) -> noop.
1908 if (MTI->getSource() == MTI->getDest())
1909 return eraseInstFromFunction(CI);
1910 }
1911
1912 auto IsPointerUndefined = [MI](Value *Ptr) {
1913 return isa<ConstantPointerNull>(Ptr) &&
1915 MI->getFunction(),
1916 cast<PointerType>(Ptr->getType())->getAddressSpace());
1917 };
1918 bool SrcIsUndefined = false;
1919 // If we can determine a pointer alignment that is bigger than currently
1920 // set, update the alignment.
1921 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1923 return I;
1924 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1925 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1926 if (Instruction *I = SimplifyAnyMemSet(MSI))
1927 return I;
1928 }
1929
1930 // If src/dest is null, this memory intrinsic must be a noop.
1931 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1932 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1933 return eraseInstFromFunction(CI);
1934 }
1935
1936 // If we have a memmove and the source operation is a constant global,
1937 // then the source and dest pointers can't alias, so we can change this
1938 // into a call to memcpy.
1939 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1940 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1941 if (GVSrc->isConstant()) {
1942 Module *M = CI.getModule();
1943 Intrinsic::ID MemCpyID =
1944 MMI->isAtomic()
1945 ? Intrinsic::memcpy_element_unordered_atomic
1946 : Intrinsic::memcpy;
1947 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1948 CI.getArgOperand(1)->getType(),
1949 CI.getArgOperand(2)->getType() };
1951 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
1952 return II;
1953 }
1954 }
1955 }
1956
1957 // For fixed width vector result intrinsics, use the generic demanded vector
1958 // support.
1959 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
1960 auto VWidth = IIFVTy->getNumElements();
1961 APInt PoisonElts(VWidth, 0);
1962 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1963 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
1964 if (V != II)
1965 return replaceInstUsesWith(*II, V);
1966 return II;
1967 }
1968 }
1969
1970 if (II->isCommutative()) {
1971 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
1972 replaceOperand(*II, 0, Pair->first);
1973 replaceOperand(*II, 1, Pair->second);
1974 return II;
1975 }
1976
1977 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
1978 return NewCall;
1979 }
1980
1981 // Unused constrained FP intrinsic calls may have declared side effect, which
1982 // prevents it from being removed. In some cases however the side effect is
1983 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1984 // returns a replacement, the call may be removed.
1985 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
1986 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
1987 return eraseInstFromFunction(CI);
1988 }
1989
1990 Intrinsic::ID IID = II->getIntrinsicID();
1991 switch (IID) {
1992 case Intrinsic::objectsize: {
1993 SmallVector<Instruction *> InsertedInstructions;
1994 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
1995 &InsertedInstructions)) {
1996 for (Instruction *Inserted : InsertedInstructions)
1997 Worklist.add(Inserted);
1998 return replaceInstUsesWith(CI, V);
1999 }
2000 return nullptr;
2001 }
2002 case Intrinsic::abs: {
2003 Value *IIOperand = II->getArgOperand(0);
2004 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2005
2006 // abs(-x) -> abs(x)
2007 Value *X;
2008 if (match(IIOperand, m_Neg(m_Value(X)))) {
2009 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2010 replaceOperand(*II, 1, Builder.getTrue());
2011 return replaceOperand(*II, 0, X);
2012 }
2013 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2014 return replaceOperand(*II, 0, X);
2015
2016 Value *Y;
2017 // abs(a * abs(b)) -> abs(a * b)
2018 if (match(IIOperand,
2021 bool NSW =
2022 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2023 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2024 return replaceOperand(*II, 0, XY);
2025 }
2026
2027 if (std::optional<bool> Known =
2028 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2029 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2030 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2031 if (!*Known)
2032 return replaceInstUsesWith(*II, IIOperand);
2033
2034 // abs(x) -> -x if x < 0
2035 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2036 if (IntMinIsPoison)
2037 return BinaryOperator::CreateNSWNeg(IIOperand);
2038 return BinaryOperator::CreateNeg(IIOperand);
2039 }
2040
2041 // abs (sext X) --> zext (abs X*)
2042 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2043 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2044 Value *NarrowAbs =
2045 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2046 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2047 }
2048
2049 // Match a complicated way to check if a number is odd/even:
2050 // abs (srem X, 2) --> and X, 1
2051 const APInt *C;
2052 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2053 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2054
2055 break;
2056 }
2057 case Intrinsic::umin: {
2058 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2059 // umin(x, 1) == zext(x != 0)
2060 if (match(I1, m_One())) {
2061 assert(II->getType()->getScalarSizeInBits() != 1 &&
2062 "Expected simplify of umin with max constant");
2063 Value *Zero = Constant::getNullValue(I0->getType());
2064 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2065 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2066 }
2067 // umin(cttz(x), const) --> cttz(x | (1 << const))
2068 if (Value *FoldedCttz =
2070 I0, I1, DL, Builder))
2071 return replaceInstUsesWith(*II, FoldedCttz);
2072 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2073 if (Value *FoldedCtlz =
2075 I0, I1, DL, Builder))
2076 return replaceInstUsesWith(*II, FoldedCtlz);
2077 [[fallthrough]];
2078 }
2079 case Intrinsic::umax: {
2080 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2081 Value *X, *Y;
2082 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2083 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2084 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2085 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2086 }
2087 Constant *C;
2088 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2089 I0->hasOneUse()) {
2090 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2091 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2092 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2093 }
2094 }
2095 // If C is not 0:
2096 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2097 // If C is not 0 or 1:
2098 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2099 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2100 const APInt *C;
2101 Value *X;
2102 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2103 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2104 return nullptr;
2105 if (C->isZero())
2106 return nullptr;
2107 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2108 return nullptr;
2109
2110 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2111 Value *NewSelect =
2112 Builder.CreateSelect(Cmp, ConstantInt::get(X->getType(), 1), A);
2113 return replaceInstUsesWith(*II, NewSelect);
2114 };
2115
2116 if (IID == Intrinsic::umax) {
2117 if (Instruction *I = foldMaxMulShift(I0, I1))
2118 return I;
2119 if (Instruction *I = foldMaxMulShift(I1, I0))
2120 return I;
2121 }
2122
2123 // If both operands of unsigned min/max are sign-extended, it is still ok
2124 // to narrow the operation.
2125 [[fallthrough]];
2126 }
2127 case Intrinsic::smax:
2128 case Intrinsic::smin: {
2129 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2130 Value *X, *Y;
2131 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2132 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2133 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2134 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2135 }
2136
2137 Constant *C;
2138 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2139 I0->hasOneUse()) {
2140 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2141 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2142 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2143 }
2144 }
2145
2146 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2147 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2148 const APInt *MinC, *MaxC;
2149 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2150 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2151 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2152 Value *NewMax = Builder.CreateBinaryIntrinsic(
2153 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2154 return replaceInstUsesWith(
2155 *II, Builder.CreateBinaryIntrinsic(
2156 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2157 };
2158 if (IID == Intrinsic::smax &&
2160 m_APInt(MinC)))) &&
2161 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2162 return CreateCanonicalClampForm(true);
2163 if (IID == Intrinsic::umax &&
2165 m_APInt(MinC)))) &&
2166 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2167 return CreateCanonicalClampForm(false);
2168
2169 // umin(i1 X, i1 Y) -> and i1 X, Y
2170 // smax(i1 X, i1 Y) -> and i1 X, Y
2171 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2172 II->getType()->isIntOrIntVectorTy(1)) {
2173 return BinaryOperator::CreateAnd(I0, I1);
2174 }
2175
2176 // umax(i1 X, i1 Y) -> or i1 X, Y
2177 // smin(i1 X, i1 Y) -> or i1 X, Y
2178 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2179 II->getType()->isIntOrIntVectorTy(1)) {
2180 return BinaryOperator::CreateOr(I0, I1);
2181 }
2182
2183 // smin(smax(X, -1), 1) -> scmp(X, 0)
2184 // smax(smin(X, 1), -1) -> scmp(X, 0)
2185 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2186 // And i1's have been changed to and/ors
2187 // So we only need to check for smin
2188 if (IID == Intrinsic::smin) {
2189 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2190 match(I1, m_One())) {
2191 Value *Zero = ConstantInt::get(X->getType(), 0);
2192 return replaceInstUsesWith(
2193 CI,
2194 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2195 }
2196 }
2197
2198 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2199 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2200 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2201 // TODO: Canonicalize neg after min/max if I1 is constant.
2202 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2203 (I0->hasOneUse() || I1->hasOneUse())) {
2205 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2206 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2207 }
2208 }
2209
2210 // (umax X, (xor X, Pow2))
2211 // -> (or X, Pow2)
2212 // (umin X, (xor X, Pow2))
2213 // -> (and X, ~Pow2)
2214 // (smax X, (xor X, Pos_Pow2))
2215 // -> (or X, Pos_Pow2)
2216 // (smin X, (xor X, Pos_Pow2))
2217 // -> (and X, ~Pos_Pow2)
2218 // (smax X, (xor X, Neg_Pow2))
2219 // -> (and X, ~Neg_Pow2)
2220 // (smin X, (xor X, Neg_Pow2))
2221 // -> (or X, Neg_Pow2)
2222 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2223 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2224 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2225 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2226 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2227
2228 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2229 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2230 if (KnownSign == std::nullopt) {
2231 UseOr = false;
2232 UseAndN = false;
2233 } else if (*KnownSign /* true is Signed. */) {
2234 UseOr ^= true;
2235 UseAndN ^= true;
2236 Type *Ty = I0->getType();
2237 // Negative power of 2 must be IntMin. It's possible to be able to
2238 // prove negative / power of 2 without actually having known bits, so
2239 // just get the value by hand.
2241 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2242 }
2243 }
2244 if (UseOr)
2245 return BinaryOperator::CreateOr(I0, X);
2246 else if (UseAndN)
2247 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2248 }
2249
2250 // If we can eliminate ~A and Y is free to invert:
2251 // max ~A, Y --> ~(min A, ~Y)
2252 //
2253 // Examples:
2254 // max ~A, ~Y --> ~(min A, Y)
2255 // max ~A, C --> ~(min A, ~C)
2256 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2257 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2258 Value *A;
2259 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2260 !isFreeToInvert(A, A->hasOneUse())) {
2261 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2263 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2264 return BinaryOperator::CreateNot(InvMaxMin);
2265 }
2266 }
2267 return nullptr;
2268 };
2269
2270 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2271 return I;
2272 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2273 return I;
2274
2276 return I;
2277
2278 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2279 const APInt *RHSC;
2280 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2281 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2282 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2283 ConstantInt::get(II->getType(), *RHSC));
2284
2285 // smax(X, -X) --> abs(X)
2286 // smin(X, -X) --> -abs(X)
2287 // umax(X, -X) --> -abs(X)
2288 // umin(X, -X) --> abs(X)
2289 if (isKnownNegation(I0, I1)) {
2290 // We can choose either operand as the input to abs(), but if we can
2291 // eliminate the only use of a value, that's better for subsequent
2292 // transforms/analysis.
2293 if (I0->hasOneUse() && !I1->hasOneUse())
2294 std::swap(I0, I1);
2295
2296 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2297 // operation and potentially its negation.
2298 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2299 Value *Abs = Builder.CreateBinaryIntrinsic(
2300 Intrinsic::abs, I0,
2301 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2302
2303 // We don't have a "nabs" intrinsic, so negate if needed based on the
2304 // max/min operation.
2305 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2306 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2307 return replaceInstUsesWith(CI, Abs);
2308 }
2309
2311 return Sel;
2312
2313 if (Instruction *SAdd = matchSAddSubSat(*II))
2314 return SAdd;
2315
2316 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2317 return replaceInstUsesWith(*II, NewMinMax);
2318
2320 return R;
2321
2322 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2323 return NewMinMax;
2324
2325 // Try to fold minmax with constant RHS based on range information
2326 if (match(I1, m_APIntAllowPoison(RHSC))) {
2327 ICmpInst::Predicate Pred =
2329 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2331 I0, IsSigned, SQ.getWithInstruction(II));
2332 if (!LHS_CR.isFullSet()) {
2333 if (LHS_CR.icmp(Pred, *RHSC))
2334 return replaceInstUsesWith(*II, I0);
2335 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2336 return replaceInstUsesWith(*II,
2337 ConstantInt::get(II->getType(), *RHSC));
2338 }
2339 }
2340
2342 return replaceInstUsesWith(*II, V);
2343
2344 break;
2345 }
2346 case Intrinsic::scmp: {
2347 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2348 Value *LHS, *RHS;
2349 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2350 return replaceInstUsesWith(
2351 CI,
2352 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2353 break;
2354 }
2355 case Intrinsic::bitreverse: {
2356 Value *IIOperand = II->getArgOperand(0);
2357 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2358 Value *X;
2359 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2360 X->getType()->isIntOrIntVectorTy(1)) {
2361 Type *Ty = II->getType();
2362 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2363 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2365 }
2366
2367 if (Instruction *crossLogicOpFold =
2369 return crossLogicOpFold;
2370
2371 break;
2372 }
2373 case Intrinsic::bswap: {
2374 Value *IIOperand = II->getArgOperand(0);
2375
2376 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2377 // inverse-shift-of-bswap:
2378 // bswap (shl X, Y) --> lshr (bswap X), Y
2379 // bswap (lshr X, Y) --> shl (bswap X), Y
2380 Value *X, *Y;
2381 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2382 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2384 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2385 BinaryOperator::BinaryOps InverseShift =
2386 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2387 ? Instruction::LShr
2388 : Instruction::Shl;
2389 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2390 }
2391 }
2392
2393 KnownBits Known = computeKnownBits(IIOperand, II);
2394 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2395 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2396 unsigned BW = Known.getBitWidth();
2397
2398 // bswap(x) -> shift(x) if x has exactly one "active byte"
2399 if (BW - LZ - TZ == 8) {
2400 assert(LZ != TZ && "active byte cannot be in the middle");
2401 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2402 return BinaryOperator::CreateNUWShl(
2403 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2404 // -> lshr(x) if the "active byte" is in the high part of x
2405 return BinaryOperator::CreateExactLShr(
2406 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2407 }
2408
2409 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2410 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2411 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2412 Value *CV = ConstantInt::get(X->getType(), C);
2413 Value *V = Builder.CreateLShr(X, CV);
2414 return new TruncInst(V, IIOperand->getType());
2415 }
2416
2417 if (Instruction *crossLogicOpFold =
2419 return crossLogicOpFold;
2420 }
2421
2422 // Try to fold into bitreverse if bswap is the root of the expression tree.
2423 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2424 /*MatchBitReversals*/ true))
2425 return BitOp;
2426 break;
2427 }
2428 case Intrinsic::masked_load:
2429 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2430 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2431 break;
2432 case Intrinsic::masked_store:
2433 return simplifyMaskedStore(*II);
2434 case Intrinsic::masked_gather:
2435 return simplifyMaskedGather(*II);
2436 case Intrinsic::masked_scatter:
2437 return simplifyMaskedScatter(*II);
2438 case Intrinsic::launder_invariant_group:
2439 case Intrinsic::strip_invariant_group:
2440 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2441 return replaceInstUsesWith(*II, SkippedBarrier);
2442 break;
2443 case Intrinsic::powi:
2444 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2445 // 0 and 1 are handled in instsimplify
2446 // powi(x, -1) -> 1/x
2447 if (Power->isMinusOne())
2448 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2449 II->getArgOperand(0), II);
2450 // powi(x, 2) -> x*x
2451 if (Power->equalsInt(2))
2452 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2453 II->getArgOperand(0), II);
2454
2455 if (!Power->getValue()[0]) {
2456 Value *X;
2457 // If power is even:
2458 // powi(-x, p) -> powi(x, p)
2459 // powi(fabs(x), p) -> powi(x, p)
2460 // powi(copysign(x, y), p) -> powi(x, p)
2461 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2462 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2463 match(II->getArgOperand(0),
2465 return replaceOperand(*II, 0, X);
2466 }
2467 }
2468 break;
2469
2470 case Intrinsic::cttz:
2471 case Intrinsic::ctlz:
2472 if (auto *I = foldCttzCtlz(*II, *this))
2473 return I;
2474 break;
2475
2476 case Intrinsic::ctpop:
2477 if (auto *I = foldCtpop(*II, *this))
2478 return I;
2479 break;
2480
2481 case Intrinsic::fshl:
2482 case Intrinsic::fshr: {
2483 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2484 Type *Ty = II->getType();
2485 unsigned BitWidth = Ty->getScalarSizeInBits();
2486 Constant *ShAmtC;
2487 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2488 // Canonicalize a shift amount constant operand to modulo the bit-width.
2489 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2490 Constant *ModuloC =
2491 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2492 if (!ModuloC)
2493 return nullptr;
2494 if (ModuloC != ShAmtC)
2495 return replaceOperand(*II, 2, ModuloC);
2496
2498 ShAmtC, DL),
2499 m_One()) &&
2500 "Shift amount expected to be modulo bitwidth");
2501
2502 // Canonicalize funnel shift right by constant to funnel shift left. This
2503 // is not entirely arbitrary. For historical reasons, the backend may
2504 // recognize rotate left patterns but miss rotate right patterns.
2505 if (IID == Intrinsic::fshr) {
2506 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2507 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2508 return nullptr;
2509
2510 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2511 Module *Mod = II->getModule();
2512 Function *Fshl =
2513 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2514 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2515 }
2516 assert(IID == Intrinsic::fshl &&
2517 "All funnel shifts by simple constants should go left");
2518
2519 // fshl(X, 0, C) --> shl X, C
2520 // fshl(X, undef, C) --> shl X, C
2521 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2522 return BinaryOperator::CreateShl(Op0, ShAmtC);
2523
2524 // fshl(0, X, C) --> lshr X, (BW-C)
2525 // fshl(undef, X, C) --> lshr X, (BW-C)
2526 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2527 return BinaryOperator::CreateLShr(Op1,
2528 ConstantExpr::getSub(WidthC, ShAmtC));
2529
2530 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2531 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2532 Module *Mod = II->getModule();
2533 Function *Bswap =
2534 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2535 return CallInst::Create(Bswap, { Op0 });
2536 }
2537 if (Instruction *BitOp =
2538 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2539 /*MatchBitReversals*/ true))
2540 return BitOp;
2541
2542 // R = fshl(X, X, C2)
2543 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2544 Value *InnerOp;
2545 const APInt *ShAmtInnerC, *ShAmtOuterC;
2546 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2547 m_APInt(ShAmtInnerC))) &&
2548 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2549 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2550 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2551 if (Modulo.isZero())
2552 return replaceInstUsesWith(*II, InnerOp);
2553 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2555 {InnerOp, InnerOp, ModuloC});
2556 }
2557 }
2558
2559 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2560 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2561 // if BitWidth is a power-of-2
2562 Value *Y;
2563 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2564 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2565 Module *Mod = II->getModule();
2567 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2568 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2569 }
2570
2571 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2572 // power-of-2
2573 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2574 match(Op1, m_ZeroInt())) {
2575 Value *Op2 = II->getArgOperand(2);
2576 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2577 return BinaryOperator::CreateShl(Op0, And);
2578 }
2579
2580 // Left or right might be masked.
2582 return &CI;
2583
2584 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2585 // so only the low bits of the shift amount are demanded if the bitwidth is
2586 // a power-of-2.
2587 if (!isPowerOf2_32(BitWidth))
2588 break;
2590 KnownBits Op2Known(BitWidth);
2591 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2592 return &CI;
2593 break;
2594 }
2595 case Intrinsic::ptrmask: {
2596 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2597 KnownBits Known(BitWidth);
2599 return II;
2600
2601 Value *InnerPtr, *InnerMask;
2602 bool Changed = false;
2603 // Combine:
2604 // (ptrmask (ptrmask p, A), B)
2605 // -> (ptrmask p, (and A, B))
2606 if (match(II->getArgOperand(0),
2608 m_Value(InnerMask))))) {
2609 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2610 "Mask types must match");
2611 // TODO: If InnerMask == Op1, we could copy attributes from inner
2612 // callsite -> outer callsite.
2613 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2614 replaceOperand(CI, 0, InnerPtr);
2615 replaceOperand(CI, 1, NewMask);
2616 Changed = true;
2617 }
2618
2619 // See if we can deduce non-null.
2620 if (!CI.hasRetAttr(Attribute::NonNull) &&
2621 (Known.isNonZero() ||
2622 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2623 CI.addRetAttr(Attribute::NonNull);
2624 Changed = true;
2625 }
2626
2627 unsigned NewAlignmentLog =
2629 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2630 // Known bits will capture if we had alignment information associated with
2631 // the pointer argument.
2632 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2634 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2635 Changed = true;
2636 }
2637 if (Changed)
2638 return &CI;
2639 break;
2640 }
2641 case Intrinsic::uadd_with_overflow:
2642 case Intrinsic::sadd_with_overflow: {
2643 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2644 return I;
2645
2646 // Given 2 constant operands whose sum does not overflow:
2647 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2648 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2649 Value *X;
2650 const APInt *C0, *C1;
2651 Value *Arg0 = II->getArgOperand(0);
2652 Value *Arg1 = II->getArgOperand(1);
2653 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2654 bool HasNWAdd = IsSigned
2655 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2656 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2657 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2658 bool Overflow;
2659 APInt NewC =
2660 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2661 if (!Overflow)
2662 return replaceInstUsesWith(
2663 *II, Builder.CreateBinaryIntrinsic(
2664 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2665 }
2666 break;
2667 }
2668
2669 case Intrinsic::umul_with_overflow:
2670 case Intrinsic::smul_with_overflow:
2671 case Intrinsic::usub_with_overflow:
2672 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2673 return I;
2674 break;
2675
2676 case Intrinsic::ssub_with_overflow: {
2677 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2678 return I;
2679
2680 Constant *C;
2681 Value *Arg0 = II->getArgOperand(0);
2682 Value *Arg1 = II->getArgOperand(1);
2683 // Given a constant C that is not the minimum signed value
2684 // for an integer of a given bit width:
2685 //
2686 // ssubo X, C -> saddo X, -C
2687 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2688 Value *NegVal = ConstantExpr::getNeg(C);
2689 // Build a saddo call that is equivalent to the discovered
2690 // ssubo call.
2691 return replaceInstUsesWith(
2692 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2693 Arg0, NegVal));
2694 }
2695
2696 break;
2697 }
2698
2699 case Intrinsic::uadd_sat:
2700 case Intrinsic::sadd_sat:
2701 case Intrinsic::usub_sat:
2702 case Intrinsic::ssub_sat: {
2704 Type *Ty = SI->getType();
2705 Value *Arg0 = SI->getLHS();
2706 Value *Arg1 = SI->getRHS();
2707
2708 // Make use of known overflow information.
2709 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2710 Arg0, Arg1, SI);
2711 switch (OR) {
2713 break;
2715 if (SI->isSigned())
2716 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2717 else
2718 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2720 unsigned BitWidth = Ty->getScalarSizeInBits();
2721 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2722 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2723 }
2725 unsigned BitWidth = Ty->getScalarSizeInBits();
2726 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2727 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2728 }
2729 }
2730
2731 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2732 // which after that:
2733 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2734 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2735 Constant *C, *C1;
2736 Value *A;
2737 if (IID == Intrinsic::usub_sat &&
2738 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2739 match(Arg1, m_ImmConstant(C1))) {
2740 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2741 auto *NewSub =
2742 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2743 return replaceInstUsesWith(*SI, NewSub);
2744 }
2745
2746 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2747 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2748 C->isNotMinSignedValue()) {
2749 Value *NegVal = ConstantExpr::getNeg(C);
2750 return replaceInstUsesWith(
2751 *II, Builder.CreateBinaryIntrinsic(
2752 Intrinsic::sadd_sat, Arg0, NegVal));
2753 }
2754
2755 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2756 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2757 // if Val and Val2 have the same sign
2758 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2759 Value *X;
2760 const APInt *Val, *Val2;
2761 APInt NewVal;
2762 bool IsUnsigned =
2763 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2764 if (Other->getIntrinsicID() == IID &&
2765 match(Arg1, m_APInt(Val)) &&
2766 match(Other->getArgOperand(0), m_Value(X)) &&
2767 match(Other->getArgOperand(1), m_APInt(Val2))) {
2768 if (IsUnsigned)
2769 NewVal = Val->uadd_sat(*Val2);
2770 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2771 bool Overflow;
2772 NewVal = Val->sadd_ov(*Val2, Overflow);
2773 if (Overflow) {
2774 // Both adds together may add more than SignedMaxValue
2775 // without saturating the final result.
2776 break;
2777 }
2778 } else {
2779 // Cannot fold saturated addition with different signs.
2780 break;
2781 }
2782
2783 return replaceInstUsesWith(
2784 *II, Builder.CreateBinaryIntrinsic(
2785 IID, X, ConstantInt::get(II->getType(), NewVal)));
2786 }
2787 }
2788 break;
2789 }
2790
2791 case Intrinsic::minnum:
2792 case Intrinsic::maxnum:
2793 case Intrinsic::minimum:
2794 case Intrinsic::maximum: {
2795 Value *Arg0 = II->getArgOperand(0);
2796 Value *Arg1 = II->getArgOperand(1);
2797 Value *X, *Y;
2798 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2799 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2800 // If both operands are negated, invert the call and negate the result:
2801 // min(-X, -Y) --> -(max(X, Y))
2802 // max(-X, -Y) --> -(min(X, Y))
2803 Intrinsic::ID NewIID;
2804 switch (IID) {
2805 case Intrinsic::maxnum:
2806 NewIID = Intrinsic::minnum;
2807 break;
2808 case Intrinsic::minnum:
2809 NewIID = Intrinsic::maxnum;
2810 break;
2811 case Intrinsic::maximum:
2812 NewIID = Intrinsic::minimum;
2813 break;
2814 case Intrinsic::minimum:
2815 NewIID = Intrinsic::maximum;
2816 break;
2817 default:
2818 llvm_unreachable("unexpected intrinsic ID");
2819 }
2820 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2821 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2822 FNeg->copyIRFlags(II);
2823 return FNeg;
2824 }
2825
2826 // m(m(X, C2), C1) -> m(X, C)
2827 const APFloat *C1, *C2;
2828 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2829 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2830 ((match(M->getArgOperand(0), m_Value(X)) &&
2831 match(M->getArgOperand(1), m_APFloat(C2))) ||
2832 (match(M->getArgOperand(1), m_Value(X)) &&
2833 match(M->getArgOperand(0), m_APFloat(C2))))) {
2834 APFloat Res(0.0);
2835 switch (IID) {
2836 case Intrinsic::maxnum:
2837 Res = maxnum(*C1, *C2);
2838 break;
2839 case Intrinsic::minnum:
2840 Res = minnum(*C1, *C2);
2841 break;
2842 case Intrinsic::maximum:
2843 Res = maximum(*C1, *C2);
2844 break;
2845 case Intrinsic::minimum:
2846 Res = minimum(*C1, *C2);
2847 break;
2848 default:
2849 llvm_unreachable("unexpected intrinsic ID");
2850 }
2851 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2852 // was a simplification (so Arg0 and its original flags could
2853 // propagate?)
2854 Value *V = Builder.CreateBinaryIntrinsic(
2855 IID, X, ConstantFP::get(Arg0->getType(), Res),
2857 return replaceInstUsesWith(*II, V);
2858 }
2859 }
2860
2861 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2862 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2863 match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) &&
2864 X->getType() == Y->getType()) {
2865 Value *NewCall =
2866 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2867 return new FPExtInst(NewCall, II->getType());
2868 }
2869
2870 // max X, -X --> fabs X
2871 // min X, -X --> -(fabs X)
2872 // TODO: Remove one-use limitation? That is obviously better for max,
2873 // hence why we don't check for one-use for that. However,
2874 // it would be an extra instruction for min (fnabs), but
2875 // that is still likely better for analysis and codegen.
2876 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2877 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2878 return Op0->hasOneUse() ||
2879 (IID != Intrinsic::minimum && IID != Intrinsic::minnum);
2880 return false;
2881 };
2882
2883 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2884 Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
2885 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum)
2886 R = Builder.CreateFNegFMF(R, II);
2887 return replaceInstUsesWith(*II, R);
2888 }
2889
2890 break;
2891 }
2892 case Intrinsic::matrix_multiply: {
2893 // Optimize negation in matrix multiplication.
2894
2895 // -A * -B -> A * B
2896 Value *A, *B;
2897 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2898 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2899 replaceOperand(*II, 0, A);
2900 replaceOperand(*II, 1, B);
2901 return II;
2902 }
2903
2904 Value *Op0 = II->getOperand(0);
2905 Value *Op1 = II->getOperand(1);
2906 Value *OpNotNeg, *NegatedOp;
2907 unsigned NegatedOpArg, OtherOpArg;
2908 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
2909 NegatedOp = Op0;
2910 NegatedOpArg = 0;
2911 OtherOpArg = 1;
2912 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
2913 NegatedOp = Op1;
2914 NegatedOpArg = 1;
2915 OtherOpArg = 0;
2916 } else
2917 // Multiplication doesn't have a negated operand.
2918 break;
2919
2920 // Only optimize if the negated operand has only one use.
2921 if (!NegatedOp->hasOneUse())
2922 break;
2923
2924 Value *OtherOp = II->getOperand(OtherOpArg);
2925 VectorType *RetTy = cast<VectorType>(II->getType());
2926 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
2927 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
2928 ElementCount NegatedCount = NegatedOpTy->getElementCount();
2929 ElementCount OtherCount = OtherOpTy->getElementCount();
2930 ElementCount RetCount = RetTy->getElementCount();
2931 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
2932 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
2933 ElementCount::isKnownLT(OtherCount, RetCount)) {
2934 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
2935 replaceOperand(*II, NegatedOpArg, OpNotNeg);
2936 replaceOperand(*II, OtherOpArg, InverseOtherOp);
2937 return II;
2938 }
2939 // (-A) * B -> -(A * B), if it is cheaper to negate the result
2940 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
2941 SmallVector<Value *, 5> NewArgs(II->args());
2942 NewArgs[NegatedOpArg] = OpNotNeg;
2943 Instruction *NewMul =
2944 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
2945 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
2946 }
2947 break;
2948 }
2949 case Intrinsic::fmuladd: {
2950 // Try to simplify the underlying FMul.
2951 if (Value *V =
2952 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
2953 II->getFastMathFlags(), SQ.getWithInstruction(II)))
2954 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
2955 II->getFastMathFlags());
2956
2957 [[fallthrough]];
2958 }
2959 case Intrinsic::fma: {
2960 // fma fneg(x), fneg(y), z -> fma x, y, z
2961 Value *Src0 = II->getArgOperand(0);
2962 Value *Src1 = II->getArgOperand(1);
2963 Value *Src2 = II->getArgOperand(2);
2964 Value *X, *Y;
2965 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
2966 replaceOperand(*II, 0, X);
2967 replaceOperand(*II, 1, Y);
2968 return II;
2969 }
2970
2971 // fma fabs(x), fabs(x), z -> fma x, x, z
2972 if (match(Src0, m_FAbs(m_Value(X))) &&
2973 match(Src1, m_FAbs(m_Specific(X)))) {
2974 replaceOperand(*II, 0, X);
2975 replaceOperand(*II, 1, X);
2976 return II;
2977 }
2978
2979 // Try to simplify the underlying FMul. We can only apply simplifications
2980 // that do not require rounding.
2981 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
2982 SQ.getWithInstruction(II)))
2983 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
2984
2985 // fma x, y, 0 -> fmul x, y
2986 // This is always valid for -0.0, but requires nsz for +0.0 as
2987 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2988 if (match(Src2, m_NegZeroFP()) ||
2989 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
2990 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
2991
2992 // fma x, -1.0, y -> fsub y, x
2993 if (match(Src1, m_SpecificFP(-1.0)))
2994 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
2995
2996 break;
2997 }
2998 case Intrinsic::copysign: {
2999 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3000 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3001 Sign, getSimplifyQuery().getWithInstruction(II))) {
3002 if (*KnownSignBit) {
3003 // If we know that the sign argument is negative, reduce to FNABS:
3004 // copysign Mag, -Sign --> fneg (fabs Mag)
3005 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
3006 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3007 }
3008
3009 // If we know that the sign argument is positive, reduce to FABS:
3010 // copysign Mag, +Sign --> fabs Mag
3011 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
3012 return replaceInstUsesWith(*II, Fabs);
3013 }
3014
3015 // Propagate sign argument through nested calls:
3016 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3017 Value *X;
3019 Value *CopySign =
3020 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3021 return replaceInstUsesWith(*II, CopySign);
3022 }
3023
3024 // Clear sign-bit of constant magnitude:
3025 // copysign -MagC, X --> copysign MagC, X
3026 // TODO: Support constant folding for fabs
3027 const APFloat *MagC;
3028 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3029 APFloat PosMagC = *MagC;
3030 PosMagC.clearSign();
3031 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
3032 }
3033
3034 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3035 // copysign (fabs X), Sign --> copysign X, Sign
3036 // copysign (fneg X), Sign --> copysign X, Sign
3037 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3038 return replaceOperand(*II, 0, X);
3039
3040 break;
3041 }
3042 case Intrinsic::fabs: {
3043 Value *Cond, *TVal, *FVal;
3044 Value *Arg = II->getArgOperand(0);
3045 Value *X;
3046 // fabs (-X) --> fabs (X)
3047 if (match(Arg, m_FNeg(m_Value(X)))) {
3048 CallInst *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
3049 return replaceInstUsesWith(CI, Fabs);
3050 }
3051
3052 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3053 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3054 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3055 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3056 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3057 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3058 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3059 FastMathFlags FMF1 = II->getFastMathFlags();
3060 FastMathFlags FMF2 = cast<SelectInst>(Arg)->getFastMathFlags();
3061 FMF2.setNoSignedZeros(false);
3062 SI->setFastMathFlags(FMF1 | FMF2);
3063 return SI;
3064 }
3065 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3066 if (match(TVal, m_FNeg(m_Specific(FVal))))
3067 return replaceOperand(*II, 0, FVal);
3068 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3069 if (match(FVal, m_FNeg(m_Specific(TVal))))
3070 return replaceOperand(*II, 0, TVal);
3071 }
3072
3073 Value *Magnitude, *Sign;
3074 if (match(II->getArgOperand(0),
3075 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3076 // fabs (copysign x, y) -> (fabs x)
3077 CallInst *AbsSign =
3078 Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Magnitude, II);
3079 return replaceInstUsesWith(*II, AbsSign);
3080 }
3081
3082 [[fallthrough]];
3083 }
3084 case Intrinsic::ceil:
3085 case Intrinsic::floor:
3086 case Intrinsic::round:
3087 case Intrinsic::roundeven:
3088 case Intrinsic::nearbyint:
3089 case Intrinsic::rint:
3090 case Intrinsic::trunc: {
3091 Value *ExtSrc;
3092 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3093 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3094 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3095 return new FPExtInst(NarrowII, II->getType());
3096 }
3097 break;
3098 }
3099 case Intrinsic::cos:
3100 case Intrinsic::amdgcn_cos:
3101 case Intrinsic::cosh: {
3102 Value *X, *Sign;
3103 Value *Src = II->getArgOperand(0);
3104 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3105 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3106 // f(-x) --> f(x)
3107 // f(fabs(x)) --> f(x)
3108 // f(copysign(x, y)) --> f(x)
3109 // for f in {cos, cosh}
3110 return replaceOperand(*II, 0, X);
3111 }
3112 break;
3113 }
3114 case Intrinsic::sin:
3115 case Intrinsic::amdgcn_sin:
3116 case Intrinsic::sinh:
3117 case Intrinsic::tan:
3118 case Intrinsic::tanh: {
3119 Value *X;
3120 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3121 // f(-x) --> -f(x)
3122 // for f in {sin, sinh, tan, tanh}
3123 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3124 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3125 }
3126 break;
3127 }
3128 case Intrinsic::ldexp: {
3129 // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
3130 //
3131 // The danger is if the first ldexp would overflow to infinity or underflow
3132 // to zero, but the combined exponent avoids it. We ignore this with
3133 // reassoc.
3134 //
3135 // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
3136 // it would just double down on the overflow/underflow which would occur
3137 // anyway.
3138 //
3139 // TODO: Could do better if we had range tracking for the input value
3140 // exponent. Also could broaden sign check to cover == 0 case.
3141 Value *Src = II->getArgOperand(0);
3142 Value *Exp = II->getArgOperand(1);
3143
3144 uint64_t ConstExp;
3145 if (match(Exp, m_ConstantInt(ConstExp))) {
3146 // ldexp(x, K) -> fmul x, 2^K
3147 const fltSemantics &FPTy =
3148 Src->getType()->getScalarType()->getFltSemantics();
3149
3150 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3152 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3153 // Skip overflow and underflow cases.
3154 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3155 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3156 }
3157 }
3158
3159 Value *InnerSrc;
3160 Value *InnerExp;
3162 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3163 Exp->getType() == InnerExp->getType()) {
3164 FastMathFlags FMF = II->getFastMathFlags();
3165 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3166
3167 if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3168 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II))) {
3169 // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
3170 // width.
3171 Value *NewExp = Builder.CreateAdd(InnerExp, Exp);
3172 II->setArgOperand(1, NewExp);
3173 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3174 return replaceOperand(*II, 0, InnerSrc);
3175 }
3176 }
3177
3178 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3179 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3180 Value *ExtSrc;
3181 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3182 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3183 Value *Select =
3184 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3185 ConstantFP::get(II->getType(), 1.0));
3187 }
3188 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3189 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3190 Value *Select =
3191 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3192 ConstantFP::get(II->getType(), 1.0));
3194 }
3195
3196 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3197 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3198 ///
3199 // TODO: If we cared, should insert a canonicalize for x
3200 Value *SelectCond, *SelectLHS, *SelectRHS;
3201 if (match(II->getArgOperand(1),
3202 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3203 m_Value(SelectRHS))))) {
3204 Value *NewLdexp = nullptr;
3205 Value *Select = nullptr;
3206 if (match(SelectRHS, m_ZeroInt())) {
3207 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3208 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3209 } else if (match(SelectLHS, m_ZeroInt())) {
3210 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3211 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3212 }
3213
3214 if (NewLdexp) {
3215 Select->takeName(II);
3216 return replaceInstUsesWith(*II, Select);
3217 }
3218 }
3219
3220 break;
3221 }
3222 case Intrinsic::ptrauth_auth:
3223 case Intrinsic::ptrauth_resign: {
3224 // We don't support this optimization on intrinsic calls with deactivation
3225 // symbols, which are represented using operand bundles.
3226 if (II->hasOperandBundles())
3227 break;
3228
3229 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3230 // sign+auth component if the key and discriminator match.
3231 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3232 Value *Ptr = II->getArgOperand(0);
3233 Value *Key = II->getArgOperand(1);
3234 Value *Disc = II->getArgOperand(2);
3235
3236 // AuthKey will be the key we need to end up authenticating against in
3237 // whatever we replace this sequence with.
3238 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3239 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3240 // We don't support this optimization on intrinsic calls with deactivation
3241 // symbols, which are represented using operand bundles.
3242 if (CI->hasOperandBundles())
3243 break;
3244
3245 BasePtr = CI->getArgOperand(0);
3246 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3247 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3248 break;
3249 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3250 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3251 break;
3252 AuthKey = CI->getArgOperand(1);
3253 AuthDisc = CI->getArgOperand(2);
3254 } else
3255 break;
3256 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3257 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3258 // our purposes, so check for that too.
3259 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3260 if (!CPA || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3261 break;
3262
3263 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3264 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3265 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3266 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3267 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3268 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3269 SignDisc, /*AddrDisc=*/Null,
3270 /*DeactivationSymbol=*/Null);
3272 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3273 return eraseInstFromFunction(*II);
3274 }
3275
3276 // auth(ptrauth(p,k,d),k,d) -> p
3277 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3278 } else
3279 break;
3280
3281 unsigned NewIntrin;
3282 if (AuthKey && NeedSign) {
3283 // resign(0,1) + resign(1,2) = resign(0, 2)
3284 NewIntrin = Intrinsic::ptrauth_resign;
3285 } else if (AuthKey) {
3286 // resign(0,1) + auth(1) = auth(0)
3287 NewIntrin = Intrinsic::ptrauth_auth;
3288 } else if (NeedSign) {
3289 // sign(0) + resign(0, 1) = sign(1)
3290 NewIntrin = Intrinsic::ptrauth_sign;
3291 } else {
3292 // sign(0) + auth(0) = nop
3293 replaceInstUsesWith(*II, BasePtr);
3294 return eraseInstFromFunction(*II);
3295 }
3296
3297 SmallVector<Value *, 4> CallArgs;
3298 CallArgs.push_back(BasePtr);
3299 if (AuthKey) {
3300 CallArgs.push_back(AuthKey);
3301 CallArgs.push_back(AuthDisc);
3302 }
3303
3304 if (NeedSign) {
3305 CallArgs.push_back(II->getArgOperand(3));
3306 CallArgs.push_back(II->getArgOperand(4));
3307 }
3308
3309 Function *NewFn =
3310 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3311 return CallInst::Create(NewFn, CallArgs);
3312 }
3313 case Intrinsic::arm_neon_vtbl1:
3314 case Intrinsic::arm_neon_vtbl2:
3315 case Intrinsic::arm_neon_vtbl3:
3316 case Intrinsic::arm_neon_vtbl4:
3317 case Intrinsic::aarch64_neon_tbl1:
3318 case Intrinsic::aarch64_neon_tbl2:
3319 case Intrinsic::aarch64_neon_tbl3:
3320 case Intrinsic::aarch64_neon_tbl4:
3321 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3322 case Intrinsic::arm_neon_vtbx1:
3323 case Intrinsic::arm_neon_vtbx2:
3324 case Intrinsic::arm_neon_vtbx3:
3325 case Intrinsic::arm_neon_vtbx4:
3326 case Intrinsic::aarch64_neon_tbx1:
3327 case Intrinsic::aarch64_neon_tbx2:
3328 case Intrinsic::aarch64_neon_tbx3:
3329 case Intrinsic::aarch64_neon_tbx4:
3330 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3331
3332 case Intrinsic::arm_neon_vmulls:
3333 case Intrinsic::arm_neon_vmullu:
3334 case Intrinsic::aarch64_neon_smull:
3335 case Intrinsic::aarch64_neon_umull: {
3336 Value *Arg0 = II->getArgOperand(0);
3337 Value *Arg1 = II->getArgOperand(1);
3338
3339 // Handle mul by zero first:
3341 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3342 }
3343
3344 // Check for constant LHS & RHS - in this case we just simplify.
3345 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3346 IID == Intrinsic::aarch64_neon_umull);
3347 VectorType *NewVT = cast<VectorType>(II->getType());
3348 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3349 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3350 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3351 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3352 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3353 }
3354
3355 // Couldn't simplify - canonicalize constant to the RHS.
3356 std::swap(Arg0, Arg1);
3357 }
3358
3359 // Handle mul by one:
3360 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3361 if (ConstantInt *Splat =
3362 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3363 if (Splat->isOne())
3364 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3365 /*isSigned=*/!Zext);
3366
3367 break;
3368 }
3369 case Intrinsic::arm_neon_aesd:
3370 case Intrinsic::arm_neon_aese:
3371 case Intrinsic::aarch64_crypto_aesd:
3372 case Intrinsic::aarch64_crypto_aese:
3373 case Intrinsic::aarch64_sve_aesd:
3374 case Intrinsic::aarch64_sve_aese: {
3375 Value *DataArg = II->getArgOperand(0);
3376 Value *KeyArg = II->getArgOperand(1);
3377
3378 // Accept zero on either operand.
3379 if (!match(KeyArg, m_ZeroInt()))
3380 std::swap(KeyArg, DataArg);
3381
3382 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3383 Value *Data, *Key;
3384 if (match(KeyArg, m_ZeroInt()) &&
3385 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3386 replaceOperand(*II, 0, Data);
3387 replaceOperand(*II, 1, Key);
3388 return II;
3389 }
3390 break;
3391 }
3392 case Intrinsic::arm_neon_vshifts:
3393 case Intrinsic::arm_neon_vshiftu:
3394 case Intrinsic::aarch64_neon_sshl:
3395 case Intrinsic::aarch64_neon_ushl:
3396 return foldNeonShift(II, *this);
3397 case Intrinsic::hexagon_V6_vandvrt:
3398 case Intrinsic::hexagon_V6_vandvrt_128B: {
3399 // Simplify Q -> V -> Q conversion.
3400 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3401 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3402 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3403 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3404 break;
3405 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3406 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3407 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3408 // Check if every byte has common bits in Bytes and Mask.
3409 uint64_t C = Bytes1 & Mask1;
3410 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3411 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3412 }
3413 break;
3414 }
3415 case Intrinsic::stackrestore: {
3416 enum class ClassifyResult {
3417 None,
3418 Alloca,
3419 StackRestore,
3420 CallWithSideEffects,
3421 };
3422 auto Classify = [](const Instruction *I) {
3423 if (isa<AllocaInst>(I))
3424 return ClassifyResult::Alloca;
3425
3426 if (auto *CI = dyn_cast<CallInst>(I)) {
3427 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3428 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3429 return ClassifyResult::StackRestore;
3430
3431 if (II->mayHaveSideEffects())
3432 return ClassifyResult::CallWithSideEffects;
3433 } else {
3434 // Consider all non-intrinsic calls to be side effects
3435 return ClassifyResult::CallWithSideEffects;
3436 }
3437 }
3438
3439 return ClassifyResult::None;
3440 };
3441
3442 // If the stacksave and the stackrestore are in the same BB, and there is
3443 // no intervening call, alloca, or stackrestore of a different stacksave,
3444 // remove the restore. This can happen when variable allocas are DCE'd.
3445 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3446 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3447 SS->getParent() == II->getParent()) {
3448 BasicBlock::iterator BI(SS);
3449 bool CannotRemove = false;
3450 for (++BI; &*BI != II; ++BI) {
3451 switch (Classify(&*BI)) {
3452 case ClassifyResult::None:
3453 // So far so good, look at next instructions.
3454 break;
3455
3456 case ClassifyResult::StackRestore:
3457 // If we found an intervening stackrestore for a different
3458 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3459 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3460 CannotRemove = true;
3461 break;
3462
3463 case ClassifyResult::Alloca:
3464 case ClassifyResult::CallWithSideEffects:
3465 // If we found an alloca, a non-intrinsic call, or an intrinsic
3466 // call with side effects, we can't remove the stackrestore.
3467 CannotRemove = true;
3468 break;
3469 }
3470 if (CannotRemove)
3471 break;
3472 }
3473
3474 if (!CannotRemove)
3475 return eraseInstFromFunction(CI);
3476 }
3477 }
3478
3479 // Scan down this block to see if there is another stack restore in the
3480 // same block without an intervening call/alloca.
3482 Instruction *TI = II->getParent()->getTerminator();
3483 bool CannotRemove = false;
3484 for (++BI; &*BI != TI; ++BI) {
3485 switch (Classify(&*BI)) {
3486 case ClassifyResult::None:
3487 // So far so good, look at next instructions.
3488 break;
3489
3490 case ClassifyResult::StackRestore:
3491 // If there is a stackrestore below this one, remove this one.
3492 return eraseInstFromFunction(CI);
3493
3494 case ClassifyResult::Alloca:
3495 case ClassifyResult::CallWithSideEffects:
3496 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3497 // with side effects (such as llvm.stacksave and llvm.read_register),
3498 // we can't remove the stack restore.
3499 CannotRemove = true;
3500 break;
3501 }
3502 if (CannotRemove)
3503 break;
3504 }
3505
3506 // If the stack restore is in a return, resume, or unwind block and if there
3507 // are no allocas or calls between the restore and the return, nuke the
3508 // restore.
3509 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3510 return eraseInstFromFunction(CI);
3511 break;
3512 }
3513 case Intrinsic::lifetime_end:
3514 // Asan needs to poison memory to detect invalid access which is possible
3515 // even for empty lifetime range.
3516 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3517 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3518 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
3519 break;
3520
3521 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3522 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3523 }))
3524 return nullptr;
3525 break;
3526 case Intrinsic::assume: {
3527 Value *IIOperand = II->getArgOperand(0);
3529 II->getOperandBundlesAsDefs(OpBundles);
3530
3531 /// This will remove the boolean Condition from the assume given as
3532 /// argument and remove the assume if it becomes useless.
3533 /// always returns nullptr for use as a return values.
3534 auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
3535 assert(isa<AssumeInst>(Assume));
3537 return eraseInstFromFunction(CI);
3538 replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
3539 return nullptr;
3540 };
3541 // Remove an assume if it is followed by an identical assume.
3542 // TODO: Do we need this? Unless there are conflicting assumptions, the
3543 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3544 Instruction *Next = II->getNextNode();
3546 return RemoveConditionFromAssume(Next);
3547
3548 // Canonicalize assume(a && b) -> assume(a); assume(b);
3549 // Note: New assumption intrinsics created here are registered by
3550 // the InstCombineIRInserter object.
3551 FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3552 Value *AssumeIntrinsic = II->getCalledOperand();
3553 Value *A, *B;
3554 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3555 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
3556 II->getName());
3557 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
3558 return eraseInstFromFunction(*II);
3559 }
3560 // assume(!(a || b)) -> assume(!a); assume(!b);
3561 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3562 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3563 Builder.CreateNot(A), OpBundles, II->getName());
3564 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3565 Builder.CreateNot(B), II->getName());
3566 return eraseInstFromFunction(*II);
3567 }
3568
3569 // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3570 // (if assume is valid at the load)
3571 Instruction *LHS;
3573 m_Zero())) &&
3574 LHS->getOpcode() == Instruction::Load &&
3575 LHS->getType()->isPointerTy() &&
3576 isValidAssumeForContext(II, LHS, &DT)) {
3577 MDNode *MD = MDNode::get(II->getContext(), {});
3578 LHS->setMetadata(LLVMContext::MD_nonnull, MD);
3579 LHS->setMetadata(LLVMContext::MD_noundef, MD);
3580 return RemoveConditionFromAssume(II);
3581
3582 // TODO: apply nonnull return attributes to calls and invokes
3583 // TODO: apply range metadata for range check patterns?
3584 }
3585
3586 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3587 OperandBundleUse OBU = II->getOperandBundleAt(Idx);
3588
3589 // Separate storage assumptions apply to the underlying allocations, not
3590 // any particular pointer within them. When evaluating the hints for AA
3591 // purposes we getUnderlyingObject them; by precomputing the answers here
3592 // we can avoid having to do so repeatedly there.
3593 if (OBU.getTagName() == "separate_storage") {
3594 assert(OBU.Inputs.size() == 2);
3595 auto MaybeSimplifyHint = [&](const Use &U) {
3596 Value *Hint = U.get();
3597 // Not having a limit is safe because InstCombine removes unreachable
3598 // code.
3599 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3600 if (Hint != UnderlyingObject)
3601 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3602 };
3603 MaybeSimplifyHint(OBU.Inputs[0]);
3604 MaybeSimplifyHint(OBU.Inputs[1]);
3605 }
3606
3607 // Try to remove redundant alignment assumptions.
3608 if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
3610 *cast<AssumeInst>(II), II->arg_size() + Idx);
3611 if (!RK || RK.AttrKind != Attribute::Alignment ||
3613 continue;
3614
3615 // Remove align 1 bundles; they don't add any useful information.
3616 if (RK.ArgValue == 1)
3618
3619 // Don't try to remove align assumptions for pointers derived from
3620 // arguments. We might lose information if the function gets inline and
3621 // the align argument attribute disappears.
3623 if (!UO || isa<Argument>(UO))
3624 continue;
3625
3626 // Compute known bits for the pointer, passing nullptr as context to
3627 // avoid computeKnownBits using the assumption we are about to remove
3628 // for reasoning.
3629 KnownBits Known = computeKnownBits(RK.WasOn, /*CtxI=*/nullptr);
3630 unsigned TZ = std::min(Known.countMinTrailingZeros(),
3632 if ((1ULL << TZ) < RK.ArgValue)
3633 continue;
3635 }
3636 }
3637
3638 // Convert nonnull assume like:
3639 // %A = icmp ne i32* %PTR, null
3640 // call void @llvm.assume(i1 %A)
3641 // into
3642 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3644 match(IIOperand,
3646 A->getType()->isPointerTy()) {
3647 if (auto *Replacement = buildAssumeFromKnowledge(
3648 {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
3649
3650 Replacement->insertBefore(Next->getIterator());
3651 AC.registerAssumption(Replacement);
3652 return RemoveConditionFromAssume(II);
3653 }
3654 }
3655
3656 // Convert alignment assume like:
3657 // %B = ptrtoint i32* %A to i64
3658 // %C = and i64 %B, Constant
3659 // %D = icmp eq i64 %C, 0
3660 // call void @llvm.assume(i1 %D)
3661 // into
3662 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3663 uint64_t AlignMask = 1;
3665 (match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3666 match(IIOperand,
3668 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3669 m_Zero())))) {
3670 if (isPowerOf2_64(AlignMask + 1)) {
3671 uint64_t Offset = 0;
3673 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3674 /// Note: this doesn't preserve the offset information but merges
3675 /// offset and alignment.
3676 /// TODO: we can generate a GEP instead of merging the alignment with
3677 /// the offset.
3678 RetainedKnowledge RK{Attribute::Alignment,
3679 (unsigned)MinAlign(Offset, AlignMask + 1), A};
3680 if (auto *Replacement =
3682
3683 Replacement->insertAfter(II->getIterator());
3684 AC.registerAssumption(Replacement);
3685 }
3686 return RemoveConditionFromAssume(II);
3687 }
3688 }
3689 }
3690
3691 /// Canonicalize Knowledge in operand bundles.
3692 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3693 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3694 auto &BOI = II->bundle_op_info_begin()[Idx];
3697 if (BOI.End - BOI.Begin > 2)
3698 continue; // Prevent reducing knowledge in an align with offset since
3699 // extracting a RetainedKnowledge from them looses offset
3700 // information
3701 RetainedKnowledge CanonRK =
3704 &getDominatorTree());
3705 if (CanonRK == RK)
3706 continue;
3707 if (!CanonRK) {
3708 if (BOI.End - BOI.Begin > 0) {
3709 Worklist.pushValue(II->op_begin()[BOI.Begin]);
3710 Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
3711 }
3712 continue;
3713 }
3714 assert(RK.AttrKind == CanonRK.AttrKind);
3715 if (BOI.End - BOI.Begin > 0)
3716 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3717 if (BOI.End - BOI.Begin > 1)
3718 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3719 Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
3720 if (RK.WasOn)
3721 Worklist.pushValue(RK.WasOn);
3722 return II;
3723 }
3724 }
3725
3726 // If there is a dominating assume with the same condition as this one,
3727 // then this one is redundant, and should be removed.
3728 KnownBits Known(1);
3729 computeKnownBits(IIOperand, Known, II);
3731 return eraseInstFromFunction(*II);
3732
3733 // assume(false) is unreachable.
3734 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3736 return eraseInstFromFunction(*II);
3737 }
3738
3739 // Update the cache of affected values for this assumption (we might be
3740 // here because we just simplified the condition).
3741 AC.updateAffectedValues(cast<AssumeInst>(II));
3742 break;
3743 }
3744 case Intrinsic::experimental_guard: {
3745 // Is this guard followed by another guard? We scan forward over a small
3746 // fixed window of instructions to handle common cases with conditions
3747 // computed between guards.
3748 Instruction *NextInst = II->getNextNode();
3749 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3750 // Note: Using context-free form to avoid compile time blow up
3751 if (!isSafeToSpeculativelyExecute(NextInst))
3752 break;
3753 NextInst = NextInst->getNextNode();
3754 }
3755 Value *NextCond = nullptr;
3756 if (match(NextInst,
3758 Value *CurrCond = II->getArgOperand(0);
3759
3760 // Remove a guard that it is immediately preceded by an identical guard.
3761 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3762 if (CurrCond != NextCond) {
3763 Instruction *MoveI = II->getNextNode();
3764 while (MoveI != NextInst) {
3765 auto *Temp = MoveI;
3766 MoveI = MoveI->getNextNode();
3767 Temp->moveBefore(II->getIterator());
3768 }
3769 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3770 }
3771 eraseInstFromFunction(*NextInst);
3772 return II;
3773 }
3774 break;
3775 }
3776 case Intrinsic::vector_insert: {
3777 Value *Vec = II->getArgOperand(0);
3778 Value *SubVec = II->getArgOperand(1);
3779 Value *Idx = II->getArgOperand(2);
3780 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3781 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3782 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3783
3784 // Only canonicalize if the destination vector, Vec, and SubVec are all
3785 // fixed vectors.
3786 if (DstTy && VecTy && SubVecTy) {
3787 unsigned DstNumElts = DstTy->getNumElements();
3788 unsigned VecNumElts = VecTy->getNumElements();
3789 unsigned SubVecNumElts = SubVecTy->getNumElements();
3790 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3791
3792 // An insert that entirely overwrites Vec with SubVec is a nop.
3793 if (VecNumElts == SubVecNumElts)
3794 return replaceInstUsesWith(CI, SubVec);
3795
3796 // Widen SubVec into a vector of the same width as Vec, since
3797 // shufflevector requires the two input vectors to be the same width.
3798 // Elements beyond the bounds of SubVec within the widened vector are
3799 // undefined.
3800 SmallVector<int, 8> WidenMask;
3801 unsigned i;
3802 for (i = 0; i != SubVecNumElts; ++i)
3803 WidenMask.push_back(i);
3804 for (; i != VecNumElts; ++i)
3805 WidenMask.push_back(PoisonMaskElem);
3806
3807 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3808
3810 for (unsigned i = 0; i != IdxN; ++i)
3811 Mask.push_back(i);
3812 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3813 Mask.push_back(i);
3814 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3815 Mask.push_back(i);
3816
3817 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3818 return replaceInstUsesWith(CI, Shuffle);
3819 }
3820 break;
3821 }
3822 case Intrinsic::vector_extract: {
3823 Value *Vec = II->getArgOperand(0);
3824 Value *Idx = II->getArgOperand(1);
3825
3826 Type *ReturnType = II->getType();
3827 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3828 // ExtractIdx)
3829 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3830 Value *InsertTuple, *InsertIdx, *InsertValue;
3832 m_Value(InsertValue),
3833 m_Value(InsertIdx))) &&
3834 InsertValue->getType() == ReturnType) {
3835 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3836 // Case where we get the same index right after setting it.
3837 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3838 // InsertValue
3839 if (ExtractIdx == Index)
3840 return replaceInstUsesWith(CI, InsertValue);
3841 // If we are getting a different index than what was set in the
3842 // insert.vector intrinsic. We can just set the input tuple to the one up
3843 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3844 // InsertIndex), ExtractIndex)
3845 // --> extract.vector(InsertTuple, ExtractIndex)
3846 else
3847 return replaceOperand(CI, 0, InsertTuple);
3848 }
3849
3850 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3851 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3852
3853 if (DstTy && VecTy) {
3854 auto DstEltCnt = DstTy->getElementCount();
3855 auto VecEltCnt = VecTy->getElementCount();
3856 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3857
3858 // Extracting the entirety of Vec is a nop.
3859 if (DstEltCnt == VecTy->getElementCount()) {
3860 replaceInstUsesWith(CI, Vec);
3861 return eraseInstFromFunction(CI);
3862 }
3863
3864 // Only canonicalize to shufflevector if the destination vector and
3865 // Vec are fixed vectors.
3866 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3867 break;
3868
3870 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3871 Mask.push_back(IdxN + i);
3872
3873 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
3874 return replaceInstUsesWith(CI, Shuffle);
3875 }
3876 break;
3877 }
3878 case Intrinsic::experimental_vp_reverse: {
3879 Value *X;
3880 Value *Vec = II->getArgOperand(0);
3881 Value *Mask = II->getArgOperand(1);
3882 if (!match(Mask, m_AllOnes()))
3883 break;
3884 Value *EVL = II->getArgOperand(2);
3885 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3886 // rev(unop rev(X)) --> unop X
3887 if (match(Vec,
3889 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
3890 auto *OldUnOp = cast<UnaryOperator>(Vec);
3892 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
3893 II->getIterator());
3894 return replaceInstUsesWith(CI, NewUnOp);
3895 }
3896 break;
3897 }
3898 case Intrinsic::vector_reduce_or:
3899 case Intrinsic::vector_reduce_and: {
3900 // Canonicalize logical or/and reductions:
3901 // Or reduction for i1 is represented as:
3902 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3903 // %res = cmp ne iReduxWidth %val, 0
3904 // And reduction for i1 is represented as:
3905 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3906 // %res = cmp eq iReduxWidth %val, 11111
3907 Value *Arg = II->getArgOperand(0);
3908 Value *Vect;
3909
3910 if (Value *NewOp =
3911 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3912 replaceUse(II->getOperandUse(0), NewOp);
3913 return II;
3914 }
3915
3916 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3917 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
3918 if (FTy->getElementType() == Builder.getInt1Ty()) {
3919 Value *Res = Builder.CreateBitCast(
3920 Vect, Builder.getIntNTy(FTy->getNumElements()));
3921 if (IID == Intrinsic::vector_reduce_and) {
3922 Res = Builder.CreateICmpEQ(
3924 } else {
3925 assert(IID == Intrinsic::vector_reduce_or &&
3926 "Expected or reduction.");
3927 Res = Builder.CreateIsNotNull(Res);
3928 }
3929 if (Arg != Vect)
3930 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
3931 II->getType());
3932 return replaceInstUsesWith(CI, Res);
3933 }
3934 }
3935 [[fallthrough]];
3936 }
3937 case Intrinsic::vector_reduce_add: {
3938 if (IID == Intrinsic::vector_reduce_add) {
3939 // Convert vector_reduce_add(ZExt(<n x i1>)) to
3940 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3941 // Convert vector_reduce_add(SExt(<n x i1>)) to
3942 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3943 // Convert vector_reduce_add(<n x i1>) to
3944 // Trunc(ctpop(bitcast <n x i1> to in)).
3945 Value *Arg = II->getArgOperand(0);
3946 Value *Vect;
3947
3948 if (Value *NewOp =
3949 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3950 replaceUse(II->getOperandUse(0), NewOp);
3951 return II;
3952 }
3953
3954 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3955 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
3956 if (FTy->getElementType() == Builder.getInt1Ty()) {
3957 Value *V = Builder.CreateBitCast(
3958 Vect, Builder.getIntNTy(FTy->getNumElements()));
3959 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
3960 if (Res->getType() != II->getType())
3961 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
3962 if (Arg != Vect &&
3963 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
3964 Res = Builder.CreateNeg(Res);
3965 return replaceInstUsesWith(CI, Res);
3966 }
3967 }
3968
3969 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
3970 if (Value *Splat = getSplatValue(Arg)) {
3971 ElementCount VecToReduceCount =
3972 cast<VectorType>(Arg->getType())->getElementCount();
3973 if (VecToReduceCount.isFixed()) {
3974 unsigned VectorSize = VecToReduceCount.getFixedValue();
3975 return BinaryOperator::CreateMul(
3976 Splat,
3977 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
3978 /*ImplicitTrunc=*/true));
3979 }
3980 }
3981 }
3982 [[fallthrough]];
3983 }
3984 case Intrinsic::vector_reduce_xor: {
3985 if (IID == Intrinsic::vector_reduce_xor) {
3986 // Exclusive disjunction reduction over the vector with
3987 // (potentially-extended) i1 element type is actually a
3988 // (potentially-extended) arithmetic `add` reduction over the original
3989 // non-extended value:
3990 // vector_reduce_xor(?ext(<n x i1>))
3991 // -->
3992 // ?ext(vector_reduce_add(<n x i1>))
3993 Value *Arg = II->getArgOperand(0);
3994 Value *Vect;
3995
3996 if (Value *NewOp =
3997 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3998 replaceUse(II->getOperandUse(0), NewOp);
3999 return II;
4000 }
4001
4002 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4003 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4004 if (VTy->getElementType() == Builder.getInt1Ty()) {
4005 Value *Res = Builder.CreateAddReduce(Vect);
4006 if (Arg != Vect)
4007 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4008 II->getType());
4009 return replaceInstUsesWith(CI, Res);
4010 }
4011 }
4012 }
4013 [[fallthrough]];
4014 }
4015 case Intrinsic::vector_reduce_mul: {
4016 if (IID == Intrinsic::vector_reduce_mul) {
4017 // Multiplicative reduction over the vector with (potentially-extended)
4018 // i1 element type is actually a (potentially zero-extended)
4019 // logical `and` reduction over the original non-extended value:
4020 // vector_reduce_mul(?ext(<n x i1>))
4021 // -->
4022 // zext(vector_reduce_and(<n x i1>))
4023 Value *Arg = II->getArgOperand(0);
4024 Value *Vect;
4025
4026 if (Value *NewOp =
4027 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4028 replaceUse(II->getOperandUse(0), NewOp);
4029 return II;
4030 }
4031
4032 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4033 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4034 if (VTy->getElementType() == Builder.getInt1Ty()) {
4035 Value *Res = Builder.CreateAndReduce(Vect);
4036 if (Res->getType() != II->getType())
4037 Res = Builder.CreateZExt(Res, II->getType());
4038 return replaceInstUsesWith(CI, Res);
4039 }
4040 }
4041 }
4042 [[fallthrough]];
4043 }
4044 case Intrinsic::vector_reduce_umin:
4045 case Intrinsic::vector_reduce_umax: {
4046 if (IID == Intrinsic::vector_reduce_umin ||
4047 IID == Intrinsic::vector_reduce_umax) {
4048 // UMin/UMax reduction over the vector with (potentially-extended)
4049 // i1 element type is actually a (potentially-extended)
4050 // logical `and`/`or` reduction over the original non-extended value:
4051 // vector_reduce_u{min,max}(?ext(<n x i1>))
4052 // -->
4053 // ?ext(vector_reduce_{and,or}(<n x i1>))
4054 Value *Arg = II->getArgOperand(0);
4055 Value *Vect;
4056
4057 if (Value *NewOp =
4058 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4059 replaceUse(II->getOperandUse(0), NewOp);
4060 return II;
4061 }
4062
4063 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4064 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4065 if (VTy->getElementType() == Builder.getInt1Ty()) {
4066 Value *Res = IID == Intrinsic::vector_reduce_umin
4067 ? Builder.CreateAndReduce(Vect)
4068 : Builder.CreateOrReduce(Vect);
4069 if (Arg != Vect)
4070 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4071 II->getType());
4072 return replaceInstUsesWith(CI, Res);
4073 }
4074 }
4075 }
4076 [[fallthrough]];
4077 }
4078 case Intrinsic::vector_reduce_smin:
4079 case Intrinsic::vector_reduce_smax: {
4080 if (IID == Intrinsic::vector_reduce_smin ||
4081 IID == Intrinsic::vector_reduce_smax) {
4082 // SMin/SMax reduction over the vector with (potentially-extended)
4083 // i1 element type is actually a (potentially-extended)
4084 // logical `and`/`or` reduction over the original non-extended value:
4085 // vector_reduce_s{min,max}(<n x i1>)
4086 // -->
4087 // vector_reduce_{or,and}(<n x i1>)
4088 // and
4089 // vector_reduce_s{min,max}(sext(<n x i1>))
4090 // -->
4091 // sext(vector_reduce_{or,and}(<n x i1>))
4092 // and
4093 // vector_reduce_s{min,max}(zext(<n x i1>))
4094 // -->
4095 // zext(vector_reduce_{and,or}(<n x i1>))
4096 Value *Arg = II->getArgOperand(0);
4097 Value *Vect;
4098
4099 if (Value *NewOp =
4100 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4101 replaceUse(II->getOperandUse(0), NewOp);
4102 return II;
4103 }
4104
4105 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4106 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4107 if (VTy->getElementType() == Builder.getInt1Ty()) {
4108 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4109 if (Arg != Vect)
4110 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4111 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4112 (ExtOpc == Instruction::CastOps::ZExt))
4113 ? Builder.CreateAndReduce(Vect)
4114 : Builder.CreateOrReduce(Vect);
4115 if (Arg != Vect)
4116 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4117 return replaceInstUsesWith(CI, Res);
4118 }
4119 }
4120 }
4121 [[fallthrough]];
4122 }
4123 case Intrinsic::vector_reduce_fmax:
4124 case Intrinsic::vector_reduce_fmin:
4125 case Intrinsic::vector_reduce_fadd:
4126 case Intrinsic::vector_reduce_fmul: {
4127 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4128 IID != Intrinsic::vector_reduce_fmul) ||
4129 II->hasAllowReassoc();
4130 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4131 IID == Intrinsic::vector_reduce_fmul)
4132 ? 1
4133 : 0;
4134 Value *Arg = II->getArgOperand(ArgIdx);
4135 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4136 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4137 return nullptr;
4138 }
4139 break;
4140 }
4141 case Intrinsic::is_fpclass: {
4142 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4143 return I;
4144 break;
4145 }
4146 case Intrinsic::threadlocal_address: {
4147 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4148 MaybeAlign Align = II->getRetAlign();
4149 if (MinAlign > Align.valueOrOne()) {
4150 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4151 return II;
4152 }
4153 break;
4154 }
4155 case Intrinsic::frexp: {
4156 Value *X;
4157 // The first result is idempotent with the added complication of the struct
4158 // return, and the second result is zero because the value is already
4159 // normalized.
4160 if (match(II->getArgOperand(0), m_ExtractValue<0>(m_Value(X)))) {
4162 X = Builder.CreateInsertValue(
4163 X, Constant::getNullValue(II->getType()->getStructElementType(1)),
4164 1);
4165 return replaceInstUsesWith(*II, X);
4166 }
4167 }
4168 break;
4169 }
4170 case Intrinsic::get_active_lane_mask: {
4171 const APInt *Op0, *Op1;
4172 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4173 match(II->getOperand(1), m_APInt(Op1))) {
4174 Type *OpTy = II->getOperand(0)->getType();
4175 return replaceInstUsesWith(
4176 *II, Builder.CreateIntrinsic(
4177 II->getType(), Intrinsic::get_active_lane_mask,
4178 {Constant::getNullValue(OpTy),
4179 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4180 }
4181 break;
4182 }
4183 case Intrinsic::experimental_get_vector_length: {
4184 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4185 unsigned BitWidth =
4186 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4187 II->getType()->getScalarSizeInBits());
4188 ConstantRange Cnt =
4189 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4190 SQ.getWithInstruction(II))
4192 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4193 ->getValue()
4194 .zextOrTrunc(Cnt.getBitWidth());
4195 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4196 MaxLanes = MaxLanes.multiply(
4197 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4198
4199 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4200 return replaceInstUsesWith(
4201 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4202 return nullptr;
4203 }
4204 default: {
4205 // Handle target specific intrinsics
4206 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4207 if (V)
4208 return *V;
4209 break;
4210 }
4211 }
4212
4213 // Try to fold intrinsic into select/phi operands. This is legal if:
4214 // * The intrinsic is speculatable.
4215 // * The operand is one of the following:
4216 // - a phi.
4217 // - a select with a scalar condition.
4218 // - a select with a vector condition and II is not a cross lane operation.
4220 for (Value *Op : II->args()) {
4221 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4222 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4223 if (IsVectorCond &&
4224 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4225 continue;
4226 // Don't replace a scalar select with a more expensive vector select if
4227 // we can't simplify both arms of the select.
4228 bool SimplifyBothArms =
4229 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4231 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4232 return R;
4233 }
4234 if (auto *Phi = dyn_cast<PHINode>(Op))
4235 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4236 return R;
4237 }
4238 }
4239
4241 return Shuf;
4242
4244 return replaceInstUsesWith(*II, Reverse);
4245
4247 return replaceInstUsesWith(*II, Res);
4248
4249 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4250 // context, so it is handled in visitCallBase and we should trigger it.
4251 return visitCallBase(*II);
4252}
4253
4254// Fence instruction simplification
4256 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4257 // This check is solely here to handle arbitrary target-dependent syncscopes.
4258 // TODO: Can remove if does not matter in practice.
4259 if (NFI && FI.isIdenticalTo(NFI))
4260 return eraseInstFromFunction(FI);
4261
4262 // Returns true if FI1 is identical or stronger fence than FI2.
4263 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4264 auto FI1SyncScope = FI1->getSyncScopeID();
4265 // Consider same scope, where scope is global or single-thread.
4266 if (FI1SyncScope != FI2->getSyncScopeID() ||
4267 (FI1SyncScope != SyncScope::System &&
4268 FI1SyncScope != SyncScope::SingleThread))
4269 return false;
4270
4271 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4272 };
4273 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4274 return eraseInstFromFunction(FI);
4275
4276 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4277 if (isIdenticalOrStrongerFence(PFI, &FI))
4278 return eraseInstFromFunction(FI);
4279 return nullptr;
4280}
4281
4282// InvokeInst simplification
4284 return visitCallBase(II);
4285}
4286
4287// CallBrInst simplification
4289 return visitCallBase(CBI);
4290}
4291
4293 if (!CI->hasFnAttr("modular-format"))
4294 return nullptr;
4295
4297 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4298 // TODO: Make use of the first two arguments
4299 unsigned FirstArgIdx;
4300 [[maybe_unused]] bool Error;
4301 Error = Args[2].getAsInteger(10, FirstArgIdx);
4302 assert(!Error && "invalid first arg index");
4303 --FirstArgIdx;
4304 StringRef FnName = Args[3];
4305 StringRef ImplName = Args[4];
4307
4308 if (AllAspects.empty())
4309 return nullptr;
4310
4311 SmallVector<StringRef> NeededAspects;
4312 for (StringRef Aspect : AllAspects) {
4313 if (Aspect == "float") {
4314 if (llvm::any_of(
4315 llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
4316 CI->arg_end()),
4317 [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4318 NeededAspects.push_back("float");
4319 } else {
4320 // Unknown aspects are always considered to be needed.
4321 NeededAspects.push_back(Aspect);
4322 }
4323 }
4324
4325 if (NeededAspects.size() == AllAspects.size())
4326 return nullptr;
4327
4328 Module *M = CI->getModule();
4329 LLVMContext &Ctx = M->getContext();
4330 Function *Callee = CI->getCalledFunction();
4331 FunctionCallee ModularFn = M->getOrInsertFunction(
4332 FnName, Callee->getFunctionType(),
4333 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4334 CallInst *New = cast<CallInst>(CI->clone());
4335 New->setCalledFunction(ModularFn);
4336 New->removeFnAttr("modular-format");
4337 B.Insert(New);
4338
4339 const auto ReferenceAspect = [&](StringRef Aspect) {
4340 SmallString<20> Name = ImplName;
4341 Name += '_';
4342 Name += Aspect;
4343 Function *RelocNoneFn =
4344 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4345 B.CreateCall(RelocNoneFn,
4346 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4347 };
4348
4349 llvm::sort(NeededAspects);
4350 for (StringRef Request : NeededAspects)
4351 ReferenceAspect(Request);
4352
4353 return New;
4354}
4355
4356Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4357 if (!CI->getCalledFunction()) return nullptr;
4358
4359 // Skip optimizing notail and musttail calls so
4360 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4361 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4362 if (CI->isMustTailCall() || CI->isNoTailCall())
4363 return nullptr;
4364
4365 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4366 replaceInstUsesWith(*From, With);
4367 };
4368 auto InstCombineErase = [this](Instruction *I) {
4370 };
4371 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4372 InstCombineRAUW, InstCombineErase);
4373 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4374 ++NumSimplified;
4375 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4376 }
4377 if (Value *With = optimizeModularFormat(CI, Builder)) {
4378 ++NumSimplified;
4379 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4380 }
4381
4382 return nullptr;
4383}
4384
4386 // Strip off at most one level of pointer casts, looking for an alloca. This
4387 // is good enough in practice and simpler than handling any number of casts.
4388 Value *Underlying = TrampMem->stripPointerCasts();
4389 if (Underlying != TrampMem &&
4390 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4391 return nullptr;
4392 if (!isa<AllocaInst>(Underlying))
4393 return nullptr;
4394
4395 IntrinsicInst *InitTrampoline = nullptr;
4396 for (User *U : TrampMem->users()) {
4398 if (!II)
4399 return nullptr;
4400 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4401 if (InitTrampoline)
4402 // More than one init_trampoline writes to this value. Give up.
4403 return nullptr;
4404 InitTrampoline = II;
4405 continue;
4406 }
4407 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4408 // Allow any number of calls to adjust.trampoline.
4409 continue;
4410 return nullptr;
4411 }
4412
4413 // No call to init.trampoline found.
4414 if (!InitTrampoline)
4415 return nullptr;
4416
4417 // Check that the alloca is being used in the expected way.
4418 if (InitTrampoline->getOperand(0) != TrampMem)
4419 return nullptr;
4420
4421 return InitTrampoline;
4422}
4423
4425 Value *TrampMem) {
4426 // Visit all the previous instructions in the basic block, and try to find a
4427 // init.trampoline which has a direct path to the adjust.trampoline.
4428 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4429 E = AdjustTramp->getParent()->begin();
4430 I != E;) {
4431 Instruction *Inst = &*--I;
4433 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4434 II->getOperand(0) == TrampMem)
4435 return II;
4436 if (Inst->mayWriteToMemory())
4437 return nullptr;
4438 }
4439 return nullptr;
4440}
4441
4442// Given a call to llvm.adjust.trampoline, find and return the corresponding
4443// call to llvm.init.trampoline if the call to the trampoline can be optimized
4444// to a direct call to a function. Otherwise return NULL.
4446 Callee = Callee->stripPointerCasts();
4447 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4448 if (!AdjustTramp ||
4449 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4450 return nullptr;
4451
4452 Value *TrampMem = AdjustTramp->getOperand(0);
4453
4455 return IT;
4456 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4457 return IT;
4458 return nullptr;
4459}
4460
4461Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4462 const Value *Callee = Call.getCalledOperand();
4463 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4464 if (!IPC || !IPC->isNoopCast(DL))
4465 return nullptr;
4466
4467 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4468 if (!II)
4469 return nullptr;
4470
4471 Intrinsic::ID IIID = II->getIntrinsicID();
4472 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4473 return nullptr;
4474
4475 // Isolate the ptrauth bundle from the others.
4476 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4478 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4479 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4480 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4481 PtrAuthBundleOrNone = Bundle;
4482 else
4483 NewBundles.emplace_back(Bundle);
4484 }
4485
4486 if (!PtrAuthBundleOrNone)
4487 return nullptr;
4488
4489 Value *NewCallee = nullptr;
4490 switch (IIID) {
4491 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4492 // assuming the call bundle and the sign operands match.
4493 case Intrinsic::ptrauth_resign: {
4494 // Resign result key should match bundle.
4495 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4496 return nullptr;
4497 // Resign result discriminator should match bundle.
4498 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4499 return nullptr;
4500
4501 // Resign input (auth) key should also match: we can't change the key on
4502 // the new call we're generating, because we don't know what keys are valid.
4503 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4504 return nullptr;
4505
4506 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4507 NewBundles.emplace_back("ptrauth", NewBundleOps);
4508 NewCallee = II->getOperand(0);
4509 break;
4510 }
4511
4512 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4513 // assuming the call bundle and the sign operands match.
4514 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4515 case Intrinsic::ptrauth_sign: {
4516 // Sign key should match bundle.
4517 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4518 return nullptr;
4519 // Sign discriminator should match bundle.
4520 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4521 return nullptr;
4522 NewCallee = II->getOperand(0);
4523 break;
4524 }
4525 default:
4526 llvm_unreachable("unexpected intrinsic ID");
4527 }
4528
4529 if (!NewCallee)
4530 return nullptr;
4531
4532 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4533 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4534 NewCall->setCalledOperand(NewCallee);
4535 return NewCall;
4536}
4537
4538Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4540 if (!CPA)
4541 return nullptr;
4542
4543 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4544 // If the ptrauth constant isn't based on a function pointer, bail out.
4545 if (!CalleeF)
4546 return nullptr;
4547
4548 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4550 if (!PAB)
4551 return nullptr;
4552
4553 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4554 Value *Discriminator = PAB->Inputs[1];
4555
4556 // If the bundle doesn't match, this is probably going to fail to auth.
4557 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4558 return nullptr;
4559
4560 // If the bundle matches the constant, proceed in making this a direct call.
4562 NewCall->setCalledOperand(CalleeF);
4563 return NewCall;
4564}
4565
4566bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4567 const TargetLibraryInfo *TLI) {
4568 // Note: We only handle cases which can't be driven from generic attributes
4569 // here. So, for example, nonnull and noalias (which are common properties
4570 // of some allocation functions) are expected to be handled via annotation
4571 // of the respective allocator declaration with generic attributes.
4572 bool Changed = false;
4573
4574 if (!Call.getType()->isPointerTy())
4575 return Changed;
4576
4577 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4578 if (Size && *Size != 0) {
4579 // TODO: We really should just emit deref_or_null here and then
4580 // let the generic inference code combine that with nonnull.
4581 if (Call.hasRetAttr(Attribute::NonNull)) {
4582 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4584 Call.getContext(), Size->getLimitedValue()));
4585 } else {
4586 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4588 Call.getContext(), Size->getLimitedValue()));
4589 }
4590 }
4591
4592 // Add alignment attribute if alignment is a power of two constant.
4593 Value *Alignment = getAllocAlignment(&Call, TLI);
4594 if (!Alignment)
4595 return Changed;
4596
4597 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4598 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4599 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4600 if (llvm::isPowerOf2_64(AlignmentVal)) {
4601 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4602 Align NewAlign = Align(AlignmentVal);
4603 if (NewAlign > ExistingAlign) {
4606 Changed = true;
4607 }
4608 }
4609 }
4610 return Changed;
4611}
4612
4613/// Improvements for call, callbr and invoke instructions.
4614Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4615 bool Changed = annotateAnyAllocSite(Call, &TLI);
4616
4617 // Mark any parameters that are known to be non-null with the nonnull
4618 // attribute. This is helpful for inlining calls to functions with null
4619 // checks on their arguments.
4620 SmallVector<unsigned, 4> ArgNos;
4621 unsigned ArgNo = 0;
4622
4623 for (Value *V : Call.args()) {
4624 if (V->getType()->isPointerTy()) {
4625 // Simplify the nonnull operand if the parameter is known to be nonnull.
4626 // Otherwise, try to infer nonnull for it.
4627 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4628 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4629 (HasDereferenceable &&
4631 V->getType()->getPointerAddressSpace()))) {
4632 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4633 replaceOperand(Call, ArgNo, Res);
4634 Changed = true;
4635 }
4636 } else if (isKnownNonZero(V,
4637 getSimplifyQuery().getWithInstruction(&Call))) {
4638 ArgNos.push_back(ArgNo);
4639 }
4640 }
4641 ArgNo++;
4642 }
4643
4644 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4645
4646 if (!ArgNos.empty()) {
4647 AttributeList AS = Call.getAttributes();
4648 LLVMContext &Ctx = Call.getContext();
4649 AS = AS.addParamAttribute(Ctx, ArgNos,
4650 Attribute::get(Ctx, Attribute::NonNull));
4651 Call.setAttributes(AS);
4652 Changed = true;
4653 }
4654
4655 // If the callee is a pointer to a function, attempt to move any casts to the
4656 // arguments of the call/callbr/invoke.
4658 Function *CalleeF = dyn_cast<Function>(Callee);
4659 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4660 transformConstExprCastCall(Call))
4661 return nullptr;
4662
4663 if (CalleeF) {
4664 // Remove the convergent attr on calls when the callee is not convergent.
4665 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4666 !CalleeF->isIntrinsic()) {
4667 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4668 << "\n");
4670 return &Call;
4671 }
4672
4673 // If the call and callee calling conventions don't match, and neither one
4674 // of the calling conventions is compatible with C calling convention
4675 // this call must be unreachable, as the call is undefined.
4676 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4677 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4681 // Only do this for calls to a function with a body. A prototype may
4682 // not actually end up matching the implementation's calling conv for a
4683 // variety of reasons (e.g. it may be written in assembly).
4684 !CalleeF->isDeclaration()) {
4685 Instruction *OldCall = &Call;
4687 // If OldCall does not return void then replaceInstUsesWith poison.
4688 // This allows ValueHandlers and custom metadata to adjust itself.
4689 if (!OldCall->getType()->isVoidTy())
4690 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4691 if (isa<CallInst>(OldCall))
4692 return eraseInstFromFunction(*OldCall);
4693
4694 // We cannot remove an invoke or a callbr, because it would change thexi
4695 // CFG, just change the callee to a null pointer.
4696 cast<CallBase>(OldCall)->setCalledFunction(
4697 CalleeF->getFunctionType(),
4698 Constant::getNullValue(CalleeF->getType()));
4699 return nullptr;
4700 }
4701 }
4702
4703 // Calling a null function pointer is undefined if a null address isn't
4704 // dereferenceable.
4705 if ((isa<ConstantPointerNull>(Callee) &&
4707 isa<UndefValue>(Callee)) {
4708 // If Call does not return void then replaceInstUsesWith poison.
4709 // This allows ValueHandlers and custom metadata to adjust itself.
4710 if (!Call.getType()->isVoidTy())
4712
4713 if (Call.isTerminator()) {
4714 // Can't remove an invoke or callbr because we cannot change the CFG.
4715 return nullptr;
4716 }
4717
4718 // This instruction is not reachable, just remove it.
4721 }
4722
4723 if (IntrinsicInst *II = findInitTrampoline(Callee))
4724 return transformCallThroughTrampoline(Call, *II);
4725
4726 // Combine calls involving pointer authentication intrinsics.
4727 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4728 return NewCall;
4729
4730 // Combine calls to ptrauth constants.
4731 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4732 return NewCall;
4733
4734 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4735 InlineAsm *IA = cast<InlineAsm>(Callee);
4736 if (!IA->canThrow()) {
4737 // Normal inline asm calls cannot throw - mark them
4738 // 'nounwind'.
4740 Changed = true;
4741 }
4742 }
4743
4744 // Try to optimize the call if possible, we require DataLayout for most of
4745 // this. None of these calls are seen as possibly dead so go ahead and
4746 // delete the instruction now.
4747 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4748 Instruction *I = tryOptimizeCall(CI);
4749 // If we changed something return the result, etc. Otherwise let
4750 // the fallthrough check.
4751 if (I) return eraseInstFromFunction(*I);
4752 }
4753
4754 if (!Call.use_empty() && !Call.isMustTailCall())
4755 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4756 Type *CallTy = Call.getType();
4757 Type *RetArgTy = ReturnedArg->getType();
4758 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4759 return replaceInstUsesWith(
4760 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4761 }
4762
4763 // Drop unnecessary callee_type metadata from calls that were converted
4764 // into direct calls.
4765 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4766 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4767 Changed = true;
4768 }
4769
4770 // Drop unnecessary kcfi operand bundles from calls that were converted
4771 // into direct calls.
4773 if (Bundle && !Call.isIndirectCall()) {
4774 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4775 if (CalleeF) {
4776 ConstantInt *FunctionType = nullptr;
4777 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4778
4779 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4780 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4781
4782 if (FunctionType &&
4783 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4784 dbgs() << Call.getModule()->getName()
4785 << ": warning: kcfi: " << Call.getCaller()->getName()
4786 << ": call to " << CalleeF->getName()
4787 << " using a mismatching function pointer type\n";
4788 }
4789 });
4790
4792 }
4793
4794 if (isRemovableAlloc(&Call, &TLI))
4795 return visitAllocSite(Call);
4796
4797 // Handle intrinsics which can be used in both call and invoke context.
4798 switch (Call.getIntrinsicID()) {
4799 case Intrinsic::experimental_gc_statepoint: {
4800 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
4801 SmallPtrSet<Value *, 32> LiveGcValues;
4802 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4803 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4804
4805 // Remove the relocation if unused.
4806 if (GCR.use_empty()) {
4808 continue;
4809 }
4810
4811 Value *DerivedPtr = GCR.getDerivedPtr();
4812 Value *BasePtr = GCR.getBasePtr();
4813
4814 // Undef is undef, even after relocation.
4815 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
4818 continue;
4819 }
4820
4821 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
4822 // The relocation of null will be null for most any collector.
4823 // TODO: provide a hook for this in GCStrategy. There might be some
4824 // weird collector this property does not hold for.
4825 if (isa<ConstantPointerNull>(DerivedPtr)) {
4826 // Use null-pointer of gc_relocate's type to replace it.
4829 continue;
4830 }
4831
4832 // isKnownNonNull -> nonnull attribute
4833 if (!GCR.hasRetAttr(Attribute::NonNull) &&
4834 isKnownNonZero(DerivedPtr,
4835 getSimplifyQuery().getWithInstruction(&Call))) {
4836 GCR.addRetAttr(Attribute::NonNull);
4837 // We discovered new fact, re-check users.
4838 Worklist.pushUsersToWorkList(GCR);
4839 }
4840 }
4841
4842 // If we have two copies of the same pointer in the statepoint argument
4843 // list, canonicalize to one. This may let us common gc.relocates.
4844 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4845 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4846 auto *OpIntTy = GCR.getOperand(2)->getType();
4847 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
4848 }
4849
4850 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4851 // Canonicalize on the type from the uses to the defs
4852
4853 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4854 LiveGcValues.insert(BasePtr);
4855 LiveGcValues.insert(DerivedPtr);
4856 }
4857 std::optional<OperandBundleUse> Bundle =
4859 unsigned NumOfGCLives = LiveGcValues.size();
4860 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4861 break;
4862 // We can reduce the size of gc live bundle.
4863 DenseMap<Value *, unsigned> Val2Idx;
4864 std::vector<Value *> NewLiveGc;
4865 for (Value *V : Bundle->Inputs) {
4866 auto [It, Inserted] = Val2Idx.try_emplace(V);
4867 if (!Inserted)
4868 continue;
4869 if (LiveGcValues.count(V)) {
4870 It->second = NewLiveGc.size();
4871 NewLiveGc.push_back(V);
4872 } else
4873 It->second = NumOfGCLives;
4874 }
4875 // Update all gc.relocates
4876 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4877 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4878 Value *BasePtr = GCR.getBasePtr();
4879 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4880 "Missed live gc for base pointer");
4881 auto *OpIntTy1 = GCR.getOperand(1)->getType();
4882 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
4883 Value *DerivedPtr = GCR.getDerivedPtr();
4884 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4885 "Missed live gc for derived pointer");
4886 auto *OpIntTy2 = GCR.getOperand(2)->getType();
4887 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
4888 }
4889 // Create new statepoint instruction.
4890 OperandBundleDef NewBundle("gc-live", NewLiveGc);
4891 return CallBase::Create(&Call, NewBundle);
4892 }
4893 default: { break; }
4894 }
4895
4896 return Changed ? &Call : nullptr;
4897}
4898
4899/// If the callee is a constexpr cast of a function, attempt to move the cast to
4900/// the arguments of the call/invoke.
4901/// CallBrInst is not supported.
4902bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4903 auto *Callee =
4905 if (!Callee)
4906 return false;
4907
4909 "CallBr's don't have a single point after a def to insert at");
4910
4911 // Don't perform the transform for declarations, which may not be fully
4912 // accurate. For example, void @foo() is commonly used as a placeholder for
4913 // unknown prototypes.
4914 if (Callee->isDeclaration())
4915 return false;
4916
4917 // If this is a call to a thunk function, don't remove the cast. Thunks are
4918 // used to transparently forward all incoming parameters and outgoing return
4919 // values, so it's important to leave the cast in place.
4920 if (Callee->hasFnAttribute("thunk"))
4921 return false;
4922
4923 // If this is a call to a naked function, the assembly might be
4924 // using an argument, or otherwise rely on the frame layout,
4925 // the function prototype will mismatch.
4926 if (Callee->hasFnAttribute(Attribute::Naked))
4927 return false;
4928
4929 // If this is a musttail call, the callee's prototype must match the caller's
4930 // prototype with the exception of pointee types. The code below doesn't
4931 // implement that, so we can't do this transform.
4932 // TODO: Do the transform if it only requires adding pointer casts.
4933 if (Call.isMustTailCall())
4934 return false;
4935
4937 const AttributeList &CallerPAL = Call.getAttributes();
4938
4939 // Okay, this is a cast from a function to a different type. Unless doing so
4940 // would cause a type conversion of one of our arguments, change this call to
4941 // be a direct call with arguments casted to the appropriate types.
4942 FunctionType *FT = Callee->getFunctionType();
4943 Type *OldRetTy = Caller->getType();
4944 Type *NewRetTy = FT->getReturnType();
4945
4946 // Check to see if we are changing the return type...
4947 if (OldRetTy != NewRetTy) {
4948
4949 if (NewRetTy->isStructTy())
4950 return false; // TODO: Handle multiple return values.
4951
4952 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4953 if (!Caller->use_empty())
4954 return false; // Cannot transform this return value.
4955 }
4956
4957 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4958 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4959 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
4960 NewRetTy, CallerPAL.getRetAttrs())))
4961 return false; // Attribute not compatible with transformed value.
4962 }
4963
4964 // If the callbase is an invoke instruction, and the return value is
4965 // used by a PHI node in a successor, we cannot change the return type of
4966 // the call because there is no place to put the cast instruction (without
4967 // breaking the critical edge). Bail out in this case.
4968 if (!Caller->use_empty()) {
4969 BasicBlock *PhisNotSupportedBlock = nullptr;
4970 if (auto *II = dyn_cast<InvokeInst>(Caller))
4971 PhisNotSupportedBlock = II->getNormalDest();
4972 if (PhisNotSupportedBlock)
4973 for (User *U : Caller->users())
4974 if (PHINode *PN = dyn_cast<PHINode>(U))
4975 if (PN->getParent() == PhisNotSupportedBlock)
4976 return false;
4977 }
4978 }
4979
4980 unsigned NumActualArgs = Call.arg_size();
4981 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4982
4983 // Prevent us turning:
4984 // declare void @takes_i32_inalloca(i32* inalloca)
4985 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4986 //
4987 // into:
4988 // call void @takes_i32_inalloca(i32* null)
4989 //
4990 // Similarly, avoid folding away bitcasts of byval calls.
4991 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4992 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
4993 return false;
4994
4995 auto AI = Call.arg_begin();
4996 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4997 Type *ParamTy = FT->getParamType(i);
4998 Type *ActTy = (*AI)->getType();
4999
5000 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5001 return false; // Cannot transform this parameter value.
5002
5003 // Check if there are any incompatible attributes we cannot drop safely.
5004 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5005 .overlaps(AttributeFuncs::typeIncompatible(
5006 ParamTy, CallerPAL.getParamAttrs(i),
5007 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5008 return false; // Attribute not compatible with transformed value.
5009
5010 if (Call.isInAllocaArgument(i) ||
5011 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5012 return false; // Cannot transform to and from inalloca/preallocated.
5013
5014 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5015 return false;
5016
5017 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5018 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5019 return false; // Cannot transform to or from byval.
5020 }
5021
5022 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5023 !CallerPAL.isEmpty()) {
5024 // In this case we have more arguments than the new function type, but we
5025 // won't be dropping them. Check that these extra arguments have attributes
5026 // that are compatible with being a vararg call argument.
5027 unsigned SRetIdx;
5028 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5029 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5030 return false;
5031 }
5032
5033 // Okay, we decided that this is a safe thing to do: go ahead and start
5034 // inserting cast instructions as necessary.
5035 SmallVector<Value *, 8> Args;
5037 Args.reserve(NumActualArgs);
5038 ArgAttrs.reserve(NumActualArgs);
5039
5040 // Get any return attributes.
5041 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5042
5043 // If the return value is not being used, the type may not be compatible
5044 // with the existing attributes. Wipe out any problematic attributes.
5045 RAttrs.remove(
5046 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5047
5048 LLVMContext &Ctx = Call.getContext();
5049 AI = Call.arg_begin();
5050 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5051 Type *ParamTy = FT->getParamType(i);
5052
5053 Value *NewArg = *AI;
5054 if ((*AI)->getType() != ParamTy)
5055 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5056 Args.push_back(NewArg);
5057
5058 // Add any parameter attributes except the ones incompatible with the new
5059 // type. Note that we made sure all incompatible ones are safe to drop.
5060 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5061 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5062 ArgAttrs.push_back(
5063 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5064 }
5065
5066 // If the function takes more arguments than the call was taking, add them
5067 // now.
5068 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5069 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5070 ArgAttrs.push_back(AttributeSet());
5071 }
5072
5073 // If we are removing arguments to the function, emit an obnoxious warning.
5074 if (FT->getNumParams() < NumActualArgs) {
5075 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5076 if (FT->isVarArg()) {
5077 // Add all of the arguments in their promoted form to the arg list.
5078 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5079 Type *PTy = getPromotedType((*AI)->getType());
5080 Value *NewArg = *AI;
5081 if (PTy != (*AI)->getType()) {
5082 // Must promote to pass through va_arg area!
5083 Instruction::CastOps opcode =
5084 CastInst::getCastOpcode(*AI, false, PTy, false);
5085 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5086 }
5087 Args.push_back(NewArg);
5088
5089 // Add any parameter attributes.
5090 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5091 }
5092 }
5093 }
5094
5095 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5096
5097 if (NewRetTy->isVoidTy())
5098 Caller->setName(""); // Void type should not have a name.
5099
5100 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5101 "missing argument attributes");
5102 AttributeList NewCallerPAL = AttributeList::get(
5103 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5104
5106 Call.getOperandBundlesAsDefs(OpBundles);
5107
5108 CallBase *NewCall;
5109 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5110 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5111 II->getUnwindDest(), Args, OpBundles);
5112 } else {
5113 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5114 cast<CallInst>(NewCall)->setTailCallKind(
5115 cast<CallInst>(Caller)->getTailCallKind());
5116 }
5117 NewCall->takeName(Caller);
5119 NewCall->setAttributes(NewCallerPAL);
5120
5121 // Preserve prof metadata if any.
5122 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5123
5124 // Insert a cast of the return type as necessary.
5125 Instruction *NC = NewCall;
5126 Value *NV = NC;
5127 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5128 assert(!NV->getType()->isVoidTy());
5130 NC->setDebugLoc(Caller->getDebugLoc());
5131
5132 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5133 assert(OptInsertPt && "No place to insert cast");
5134 InsertNewInstBefore(NC, *OptInsertPt);
5135 Worklist.pushUsersToWorkList(*Caller);
5136 }
5137
5138 if (!Caller->use_empty())
5139 replaceInstUsesWith(*Caller, NV);
5140 else if (Caller->hasValueHandle()) {
5141 if (OldRetTy == NV->getType())
5143 else
5144 // We cannot call ValueIsRAUWd with a different type, and the
5145 // actual tracked value will disappear.
5147 }
5148
5149 eraseInstFromFunction(*Caller);
5150 return true;
5151}
5152
5153/// Turn a call to a function created by init_trampoline / adjust_trampoline
5154/// intrinsic pair into a direct call to the underlying function.
5156InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5157 IntrinsicInst &Tramp) {
5158 FunctionType *FTy = Call.getFunctionType();
5159 AttributeList Attrs = Call.getAttributes();
5160
5161 // If the call already has the 'nest' attribute somewhere then give up -
5162 // otherwise 'nest' would occur twice after splicing in the chain.
5163 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5164 return nullptr;
5165
5167 FunctionType *NestFTy = NestF->getFunctionType();
5168
5169 AttributeList NestAttrs = NestF->getAttributes();
5170 if (!NestAttrs.isEmpty()) {
5171 unsigned NestArgNo = 0;
5172 Type *NestTy = nullptr;
5173 AttributeSet NestAttr;
5174
5175 // Look for a parameter marked with the 'nest' attribute.
5176 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5177 E = NestFTy->param_end();
5178 I != E; ++NestArgNo, ++I) {
5179 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5180 if (AS.hasAttribute(Attribute::Nest)) {
5181 // Record the parameter type and any other attributes.
5182 NestTy = *I;
5183 NestAttr = AS;
5184 break;
5185 }
5186 }
5187
5188 if (NestTy) {
5189 std::vector<Value*> NewArgs;
5190 std::vector<AttributeSet> NewArgAttrs;
5191 NewArgs.reserve(Call.arg_size() + 1);
5192 NewArgAttrs.reserve(Call.arg_size());
5193
5194 // Insert the nest argument into the call argument list, which may
5195 // mean appending it. Likewise for attributes.
5196
5197 {
5198 unsigned ArgNo = 0;
5199 auto I = Call.arg_begin(), E = Call.arg_end();
5200 do {
5201 if (ArgNo == NestArgNo) {
5202 // Add the chain argument and attributes.
5203 Value *NestVal = Tramp.getArgOperand(2);
5204 if (NestVal->getType() != NestTy)
5205 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5206 NewArgs.push_back(NestVal);
5207 NewArgAttrs.push_back(NestAttr);
5208 }
5209
5210 if (I == E)
5211 break;
5212
5213 // Add the original argument and attributes.
5214 NewArgs.push_back(*I);
5215 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5216
5217 ++ArgNo;
5218 ++I;
5219 } while (true);
5220 }
5221
5222 // The trampoline may have been bitcast to a bogus type (FTy).
5223 // Handle this by synthesizing a new function type, equal to FTy
5224 // with the chain parameter inserted.
5225
5226 std::vector<Type*> NewTypes;
5227 NewTypes.reserve(FTy->getNumParams()+1);
5228
5229 // Insert the chain's type into the list of parameter types, which may
5230 // mean appending it.
5231 {
5232 unsigned ArgNo = 0;
5233 FunctionType::param_iterator I = FTy->param_begin(),
5234 E = FTy->param_end();
5235
5236 do {
5237 if (ArgNo == NestArgNo)
5238 // Add the chain's type.
5239 NewTypes.push_back(NestTy);
5240
5241 if (I == E)
5242 break;
5243
5244 // Add the original type.
5245 NewTypes.push_back(*I);
5246
5247 ++ArgNo;
5248 ++I;
5249 } while (true);
5250 }
5251
5252 // Replace the trampoline call with a direct call. Let the generic
5253 // code sort out any function type mismatches.
5254 FunctionType *NewFTy =
5255 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5256 AttributeList NewPAL =
5257 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5258 Attrs.getRetAttrs(), NewArgAttrs);
5259
5261 Call.getOperandBundlesAsDefs(OpBundles);
5262
5263 Instruction *NewCaller;
5264 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5265 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5266 II->getUnwindDest(), NewArgs, OpBundles);
5267 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5268 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5269 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5270 NewCaller =
5271 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5272 CBI->getIndirectDests(), NewArgs, OpBundles);
5273 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5274 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5275 } else {
5276 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5277 cast<CallInst>(NewCaller)->setTailCallKind(
5278 cast<CallInst>(Call).getTailCallKind());
5279 cast<CallInst>(NewCaller)->setCallingConv(
5280 cast<CallInst>(Call).getCallingConv());
5281 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5282 }
5283 NewCaller->setDebugLoc(Call.getDebugLoc());
5284
5285 return NewCaller;
5286 }
5287 }
5288
5289 // Replace the trampoline call with a direct call. Since there is no 'nest'
5290 // parameter, there is no need to adjust the argument list. Let the generic
5291 // code sort out any function type mismatches.
5292 Call.setCalledFunction(FTy, NestF);
5293 return &Call;
5294}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp, ZeroUndef),...
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
bool isNegative() const
Definition APFloat.h:1431
void clearSign()
Definition APFloat.h:1280
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1070
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1948
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1928
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1935
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2036
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1941
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:312
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:304
This class represents any memset intrinsic.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:236
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:279
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:294
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:244
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:248
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:240
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
bool hasOperandBundles() const
Return true if this User has any operand bundles.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:871
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:811
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange multiply(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setNoSignedZeros(bool B=true)
Definition FMF.h:84
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:610
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:594
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:249
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2085
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2607
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2212
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF, FPClassTest Interested=fcAllFlags, const Instruction *CtxI=nullptr, unsigned Depth=0) const
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
BuilderTy & Builder
AssumptionCache & getAssumptionCache() const
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1078
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:104
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:269
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:128
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:147
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
void setOperand(unsigned i, Value *Val)
Definition User.h:238
Value * getOperand(unsigned i) const
Definition User.h:233
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1233
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1286
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
static LLVM_ABI void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition Value.cpp:226
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:829
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty, true > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty, true > > > m_c_MaxOrMin(const LHS &L, const RHS &R)
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MatchFunctor< Val, Pattern > match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:195
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI RetainedKnowledge simplifyRetainedKnowledge(AssumeInst *Assume, RetainedKnowledge RK, AssumptionCache *AC, DominatorTree *DT)
canonicalize the RetainedKnowledge RK.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume, unsigned Idx)
Retreive the information help by Assume on the operand at index Idx.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1625
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
LLVM_ABI RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI)
This extracts the Knowledge from an element of an operand bundle.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1580
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1525
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1775
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI AssumeInst * buildAssumeFromKnowledge(ArrayRef< RetainedKnowledge > Knowledge, Instruction *CtxI, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Build and return a new assume created from the provided knowledge if the knowledge in the assume is f...
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1561
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1598
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:761
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:274
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:289
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:111
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:248
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:280
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:286
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
A lightweight accessor for an operand bundle meant to be passed around by value.
StringRef getTagName() const
Return the tag of this operand bundle as a string.
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
Represent one information held inside an operand bundle of an llvm.assume.
Attribute::AttrKind AttrKind
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const