LLVM 23.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/Loads.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constant.h"
36#include "llvm/IR/Constants.h"
37#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/DebugInfo.h"
40#include "llvm/IR/Function.h"
42#include "llvm/IR/InlineAsm.h"
43#include "llvm/IR/InstrTypes.h"
44#include "llvm/IR/Instruction.h"
47#include "llvm/IR/Intrinsics.h"
48#include "llvm/IR/IntrinsicsAArch64.h"
49#include "llvm/IR/IntrinsicsAMDGPU.h"
50#include "llvm/IR/IntrinsicsARM.h"
51#include "llvm/IR/IntrinsicsHexagon.h"
52#include "llvm/IR/LLVMContext.h"
53#include "llvm/IR/Metadata.h"
56#include "llvm/IR/Statepoint.h"
57#include "llvm/IR/Type.h"
58#include "llvm/IR/User.h"
59#include "llvm/IR/Value.h"
60#include "llvm/IR/ValueHandle.h"
65#include "llvm/Support/Debug.h"
76#include <algorithm>
77#include <cassert>
78#include <cstdint>
79#include <optional>
80#include <utility>
81#include <vector>
82
83#define DEBUG_TYPE "instcombine"
85
86using namespace llvm;
87using namespace PatternMatch;
88
89STATISTIC(NumSimplified, "Number of library calls simplified");
90
92 "instcombine-guard-widening-window",
93 cl::init(3),
94 cl::desc("How wide an instruction window to bypass looking for "
95 "another guard"));
96
97/// Return the specified type promoted as it would be to pass though a va_arg
98/// area.
100 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
101 if (ITy->getBitWidth() < 32)
102 return Type::getInt32Ty(Ty->getContext());
103 }
104 return Ty;
105}
106
107/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
108/// TODO: This should probably be integrated with visitAllocSites, but that
109/// requires a deeper change to allow either unread or unwritten objects.
111 auto *Src = MI->getRawSource();
112 while (isa<GetElementPtrInst>(Src)) {
113 if (!Src->hasOneUse())
114 return false;
115 Src = cast<Instruction>(Src)->getOperand(0);
116 }
117 return isa<AllocaInst>(Src) && Src->hasOneUse();
118}
119
121 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
122 MaybeAlign CopyDstAlign = MI->getDestAlign();
123 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
124 MI->setDestAlignment(DstAlign);
125 return MI;
126 }
127
128 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
129 MaybeAlign CopySrcAlign = MI->getSourceAlign();
130 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
131 MI->setSourceAlignment(SrcAlign);
132 return MI;
133 }
134
135 // If we have a store to a location which is known constant, we can conclude
136 // that the store must be storing the constant value (else the memory
137 // wouldn't be constant), and this must be a noop.
138 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
139 // Set the size of the copy to 0, it will be deleted on the next iteration.
140 MI->setLength((uint64_t)0);
141 return MI;
142 }
143
144 // If the source is provably undef, the memcpy/memmove doesn't do anything
145 // (unless the transfer is volatile).
146 if (hasUndefSource(MI) && !MI->isVolatile()) {
147 // Set the size of the copy to 0, it will be deleted on the next iteration.
148 MI->setLength((uint64_t)0);
149 return MI;
150 }
151
152 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
153 // load/store.
154 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
155 if (!MemOpLength) return nullptr;
156
157 // Source and destination pointer types are always "i8*" for intrinsic. See
158 // if the size is something we can handle with a single primitive load/store.
159 // A single load+store correctly handles overlapping memory in the memmove
160 // case.
161 uint64_t Size = MemOpLength->getLimitedValue();
162 assert(Size && "0-sized memory transferring should be removed already.");
163
164 if (Size > 8 || (Size&(Size-1)))
165 return nullptr; // If not 1/2/4/8 bytes, exit.
166
167 // If it is an atomic and alignment is less than the size then we will
168 // introduce the unaligned memory access which will be later transformed
169 // into libcall in CodeGen. This is not evident performance gain so disable
170 // it now.
171 if (MI->isAtomic())
172 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
173 return nullptr;
174
175 // Use an integer load+store unless we can find something better.
176 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
177
178 // If the memcpy has metadata describing the members, see if we can get the
179 // TBAA, scope and noalias tags describing our copy.
180 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
181
182 Value *Src = MI->getArgOperand(1);
183 Value *Dest = MI->getArgOperand(0);
184 LoadInst *L = Builder.CreateLoad(IntType, Src);
185 // Alignment from the mem intrinsic will be better, so use it.
186 L->setAlignment(*CopySrcAlign);
187 L->setAAMetadata(AACopyMD);
188 MDNode *LoopMemParallelMD =
189 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
190 if (LoopMemParallelMD)
191 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
192 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
193 if (AccessGroupMD)
194 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
195
196 StoreInst *S = Builder.CreateStore(L, Dest);
197 // Alignment from the mem intrinsic will be better, so use it.
198 S->setAlignment(*CopyDstAlign);
199 S->setAAMetadata(AACopyMD);
200 if (LoopMemParallelMD)
201 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
202 if (AccessGroupMD)
203 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
204 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
205
206 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
207 // non-atomics can be volatile
208 L->setVolatile(MT->isVolatile());
209 S->setVolatile(MT->isVolatile());
210 }
211 if (MI->isAtomic()) {
212 // atomics have to be unordered
213 L->setOrdering(AtomicOrdering::Unordered);
215 }
216
217 // Set the size of the copy to 0, it will be deleted on the next iteration.
218 MI->setLength((uint64_t)0);
219 return MI;
220}
221
223 const Align KnownAlignment =
224 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
225 MaybeAlign MemSetAlign = MI->getDestAlign();
226 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
227 MI->setDestAlignment(KnownAlignment);
228 return MI;
229 }
230
231 // If we have a store to a location which is known constant, we can conclude
232 // that the store must be storing the constant value (else the memory
233 // wouldn't be constant), and this must be a noop.
234 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
235 // Set the size of the copy to 0, it will be deleted on the next iteration.
236 MI->setLength((uint64_t)0);
237 return MI;
238 }
239
240 // Remove memset with an undef value.
241 // FIXME: This is technically incorrect because it might overwrite a poison
242 // value. Change to PoisonValue once #52930 is resolved.
243 if (isa<UndefValue>(MI->getValue())) {
244 // Set the size of the copy to 0, it will be deleted on the next iteration.
245 MI->setLength((uint64_t)0);
246 return MI;
247 }
248
249 // Extract the length and alignment and fill if they are constant.
250 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
251 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
252 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
253 return nullptr;
254 const uint64_t Len = LenC->getLimitedValue();
255 assert(Len && "0-sized memory setting should be removed already.");
256 const Align Alignment = MI->getDestAlign().valueOrOne();
257
258 // If it is an atomic and alignment is less than the size then we will
259 // introduce the unaligned memory access which will be later transformed
260 // into libcall in CodeGen. This is not evident performance gain so disable
261 // it now.
262 if (MI->isAtomic() && Alignment < Len)
263 return nullptr;
264
265 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
266 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
267 Value *Dest = MI->getDest();
268
269 // Extract the fill value and store.
270 Constant *FillVal = ConstantInt::get(
271 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
272 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
273 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
274 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
275 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
276 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
277 }
278
279 S->setAlignment(Alignment);
280 if (MI->isAtomic())
282
283 // Set the size of the copy to 0, it will be deleted on the next iteration.
284 MI->setLength((uint64_t)0);
285 return MI;
286 }
287
288 return nullptr;
289}
290
291// TODO, Obvious Missing Transforms:
292// * Narrow width by halfs excluding zero/undef lanes
293Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
294 Value *LoadPtr = II.getArgOperand(0);
295 const Align Alignment = II.getParamAlign(0).valueOrOne();
296 Value *Mask = II.getArgOperand(1);
297
298 // If the mask is all ones or poison, this is a plain vector load of the 1st
299 // argument.
300 if (match(Mask, m_AllOnesOrPoison())) {
301 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
302 "unmaskedload");
303 L->copyMetadata(II);
304 return L;
305 }
306
307 // If we can unconditionally load from this address, replace with a
308 // load/select idiom.
309 if (isDereferenceablePointer(LoadPtr, II.getType(),
311 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
312 "unmaskedload");
313 LI->copyMetadata(II);
314 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
315 }
316
317 return nullptr;
318}
319
320// TODO, Obvious Missing Transforms:
321// * Single constant active lane -> store
322// * Narrow width by halfs excluding zero/undef lanes
323Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
324 Value *StorePtr = II.getArgOperand(1);
325 Align Alignment = II.getParamAlign(1).valueOrOne();
326 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
327 if (!ConstMask)
328 return nullptr;
329
330 // If the mask is all zeros or poison, this instruction does nothing.
331 if (match(ConstMask, m_ZeroOrPoison()))
333
334 // If the mask is all ones or poison, this is a plain vector store of the 1st
335 // argument.
336 if (match(ConstMask, m_AllOnesOrPoison())) {
337 StoreInst *S =
338 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
339 S->copyMetadata(II);
340 return S;
341 }
342
343 if (isa<ScalableVectorType>(ConstMask->getType()))
344 return nullptr;
345
346 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
347 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
348 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
349 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
350 PoisonElts))
351 return replaceOperand(II, 0, V);
352
353 return nullptr;
354}
355
356// TODO, Obvious Missing Transforms:
357// * Single constant active lane load -> load
358// * Dereferenceable address & few lanes -> scalarize speculative load/selects
359// * Adjacent vector addresses -> masked.load
360// * Narrow width by halfs excluding zero/undef lanes
361// * Vector incrementing address -> vector masked load
362Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
363 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
364 if (!ConstMask)
365 return nullptr;
366
367 // Vector splat address w/known mask -> scalar load
368 // Fold the gather to load the source vector first lane
369 // because it is reloading the same value each time
370 if (ConstMask->isAllOnesValue())
371 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
372 auto *VecTy = cast<VectorType>(II.getType());
373 const Align Alignment = II.getParamAlign(0).valueOrOne();
374 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
375 Alignment, "load.scalar");
376 Value *Shuf =
377 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
379 }
380
381 return nullptr;
382}
383
384// TODO, Obvious Missing Transforms:
385// * Single constant active lane -> store
386// * Adjacent vector addresses -> masked.store
387// * Narrow store width by halfs excluding zero/undef lanes
388// * Vector incrementing address -> vector masked store
389Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
390 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
391 if (!ConstMask)
392 return nullptr;
393
394 // If the mask is all zeros or poison, a scatter does nothing.
395 if (match(ConstMask, m_ZeroOrPoison()))
397
398 // Vector splat address -> scalar store
399 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
400 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
401 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
402 if (maskContainsAllOneOrUndef(ConstMask)) {
403 Align Alignment = II.getParamAlign(1).valueOrOne();
404 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
405 Alignment);
406 S->copyMetadata(II);
407 return S;
408 }
409 }
410 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
411 // lastlane), ptr
412 if (ConstMask->isAllOnesValue()) {
413 Align Alignment = II.getParamAlign(1).valueOrOne();
414 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
415 ElementCount VF = WideLoadTy->getElementCount();
416 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
417 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
418 Value *Extract =
419 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
420 StoreInst *S =
421 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
422 S->copyMetadata(II);
423 return S;
424 }
425 }
426 if (isa<ScalableVectorType>(ConstMask->getType()))
427 return nullptr;
428
429 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
430 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
431 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
432 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
433 PoisonElts))
434 return replaceOperand(II, 0, V);
435 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
436 PoisonElts))
437 return replaceOperand(II, 1, V);
438
439 return nullptr;
440}
441
442/// This function transforms launder.invariant.group and strip.invariant.group
443/// like:
444/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
445/// launder(strip(%x)) -> launder(%x)
446/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
447/// strip(launder(%x)) -> strip(%x)
448/// This is legal because it preserves the most recent information about
449/// the presence or absence of invariant.group.
451 InstCombinerImpl &IC) {
452 auto *Arg = II.getArgOperand(0);
453 auto *StrippedArg = Arg->stripPointerCasts();
454 auto *StrippedInvariantGroupsArg = StrippedArg;
455 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
456 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
457 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
458 break;
459 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
460 }
461 if (StrippedArg == StrippedInvariantGroupsArg)
462 return nullptr; // No launders/strips to remove.
463
464 Value *Result = nullptr;
465
466 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
467 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
468 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
469 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
470 else
472 "simplifyInvariantGroupIntrinsic only handles launder and strip");
473 if (Result->getType()->getPointerAddressSpace() !=
474 II.getType()->getPointerAddressSpace())
475 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
476
477 return cast<Instruction>(Result);
478}
479
481 assert((II.getIntrinsicID() == Intrinsic::cttz ||
482 II.getIntrinsicID() == Intrinsic::ctlz) &&
483 "Expected cttz or ctlz intrinsic");
484 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
485 Value *Op0 = II.getArgOperand(0);
486 Value *Op1 = II.getArgOperand(1);
487 Value *X;
488 // ctlz(bitreverse(x)) -> cttz(x)
489 // cttz(bitreverse(x)) -> ctlz(x)
490 if (match(Op0, m_BitReverse(m_Value(X)))) {
491 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
492 Function *F =
493 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
494 return CallInst::Create(F, {X, II.getArgOperand(1)});
495 }
496
497 if (II.getType()->isIntOrIntVectorTy(1)) {
498 // ctlz/cttz i1 Op0 --> not Op0
499 if (match(Op1, m_Zero()))
500 return BinaryOperator::CreateNot(Op0);
501 // If zero is poison, then the input can be assumed to be "true", so the
502 // instruction simplifies to "false".
503 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
504 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
505 }
506
507 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
508 if (II.hasOneUse() && match(Op1, m_Zero()) &&
509 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
510 II.dropUBImplyingAttrsAndMetadata();
511 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
512 }
513
514 Constant *C;
515
516 if (IsTZ) {
517 // cttz(-x) -> cttz(x)
518 if (match(Op0, m_Neg(m_Value(X))))
519 return IC.replaceOperand(II, 0, X);
520
521 // cttz(-x & x) -> cttz(x)
522 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
523 return IC.replaceOperand(II, 0, X);
524
525 // cttz(sext(x)) -> cttz(zext(x))
526 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
527 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
528 auto *CttzZext =
529 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
530 return IC.replaceInstUsesWith(II, CttzZext);
531 }
532
533 // Zext doesn't change the number of trailing zeros, so narrow:
534 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
535 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
536 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
537 IC.Builder.getTrue());
538 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
539 return IC.replaceInstUsesWith(II, ZextCttz);
540 }
541
542 // cttz(abs(x)) -> cttz(x)
543 // cttz(nabs(x)) -> cttz(x)
544 Value *Y;
546 if (SPF == SPF_ABS || SPF == SPF_NABS)
547 return IC.replaceOperand(II, 0, X);
548
550 return IC.replaceOperand(II, 0, X);
551
552 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
553 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
554 match(Op1, m_One())) {
555 Value *ConstCttz =
556 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
557 return BinaryOperator::CreateAdd(ConstCttz, X);
558 }
559
560 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
561 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
562 match(Op1, m_One())) {
563 Value *ConstCttz =
564 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
565 return BinaryOperator::CreateSub(ConstCttz, X);
566 }
567
568 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
569 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
570 Value *Width =
571 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
572 return BinaryOperator::CreateSub(Width, X);
573 }
574 } else {
575 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
576 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
577 match(Op1, m_One())) {
578 Value *ConstCtlz =
579 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
580 return BinaryOperator::CreateAdd(ConstCtlz, X);
581 }
582
583 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
584 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
585 match(Op1, m_One())) {
586 Value *ConstCtlz =
587 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
588 return BinaryOperator::CreateSub(ConstCtlz, X);
589 }
590
591 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
592 if (Op0->hasOneUse() &&
593 match(Op0,
595 Type *Ty = II.getType();
596 unsigned BitWidth = Ty->getScalarSizeInBits();
597 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
598 {X, IC.Builder.getFalse()});
599 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
600 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
601 }
602 }
603
604 // cttz(Pow2) -> Log2(Pow2)
605 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
606 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
607 if (IsTZ)
608 return IC.replaceInstUsesWith(II, R);
609 BinaryOperator *BO = BinaryOperator::CreateSub(
610 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
611 R);
612 BO->setHasNoSignedWrap();
614 return BO;
615 }
616
617 KnownBits Known = IC.computeKnownBits(Op0, &II);
618
619 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
620 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
621 : Known.countMaxLeadingZeros();
622 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
623 : Known.countMinLeadingZeros();
624
625 // If all bits above (ctlz) or below (cttz) the first known one are known
626 // zero, this value is constant.
627 // FIXME: This should be in InstSimplify because we're replacing an
628 // instruction with a constant.
629 if (PossibleZeros == DefiniteZeros) {
630 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
631 return IC.replaceInstUsesWith(II, C);
632 }
633
634 // If the input to cttz/ctlz is known to be non-zero,
635 // then change the 'ZeroIsPoison' parameter to 'true'
636 // because we know the zero behavior can't affect the result.
637 if (!Known.One.isZero() ||
639 if (!match(II.getArgOperand(1), m_One()))
640 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
641 }
642
643 // Add range attribute since known bits can't completely reflect what we know.
644 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
645 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
646 !II.getMetadata(LLVMContext::MD_range)) {
647 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
648 APInt(BitWidth, PossibleZeros + 1));
649 II.addRangeRetAttr(Range);
650 return &II;
651 }
652
653 return nullptr;
654}
655
657 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
658 "Expected ctpop intrinsic");
659 Type *Ty = II.getType();
660 unsigned BitWidth = Ty->getScalarSizeInBits();
661 Value *Op0 = II.getArgOperand(0);
662 Value *X, *Y;
663
664 // ctpop(bitreverse(x)) -> ctpop(x)
665 // ctpop(bswap(x)) -> ctpop(x)
666 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
667 return IC.replaceOperand(II, 0, X);
668
669 // ctpop(rot(x)) -> ctpop(x)
670 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
671 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
672 X == Y)
673 return IC.replaceOperand(II, 0, X);
674
675 // ctpop(x | -x) -> bitwidth - cttz(x, false)
676 if (Op0->hasOneUse() &&
677 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
678 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
679 {X, IC.Builder.getFalse()});
680 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
681 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
682 }
683
684 // ctpop(~x & (x - 1)) -> cttz(x, false)
685 if (match(Op0,
687 Function *F =
688 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
689 return CallInst::Create(F, {X, IC.Builder.getFalse()});
690 }
691
692 // Zext doesn't change the number of set bits, so narrow:
693 // ctpop (zext X) --> zext (ctpop X)
694 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
695 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
696 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
697 }
698
699 KnownBits Known(BitWidth);
700 IC.computeKnownBits(Op0, Known, &II);
701
702 // If all bits are zero except for exactly one fixed bit, then the result
703 // must be 0 or 1, and we can get that answer by shifting to LSB:
704 // ctpop (X & 32) --> (X & 32) >> 5
705 // TODO: Investigate removing this as its likely unnecessary given the below
706 // `isKnownToBeAPowerOfTwo` check.
707 if ((~Known.Zero).isPowerOf2())
708 return BinaryOperator::CreateLShr(
709 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
710
711 // More generally we can also handle non-constant power of 2 patterns such as
712 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
713 // ctpop(Pow2OrZero) --> icmp ne X, 0
714 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
715 return CastInst::Create(Instruction::ZExt,
718 Ty);
719
720 // Add range attribute since known bits can't completely reflect what we know.
721 if (BitWidth != 1) {
722 ConstantRange OldRange =
723 II.getRange().value_or(ConstantRange::getFull(BitWidth));
724
725 unsigned Lower = Known.countMinPopulation();
726 unsigned Upper = Known.countMaxPopulation() + 1;
727
728 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
730 Lower = 1;
731
733 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
734
735 if (Range != OldRange) {
736 II.addRangeRetAttr(Range);
737 return &II;
738 }
739 }
740
741 return nullptr;
742}
743
744/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
745/// at most two source operands are actually referenced.
747 bool IsExtension) {
748 // Bail out if the mask is not a constant.
749 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
750 if (!C)
751 return nullptr;
752
753 auto *RetTy = cast<FixedVectorType>(II.getType());
754 unsigned NumIndexes = RetTy->getNumElements();
755
756 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
757 if (!RetTy->getElementType()->isIntegerTy(8) ||
758 (NumIndexes != 8 && NumIndexes != 16))
759 return nullptr;
760
761 // For tbx instructions, the first argument is the "fallback" vector, which
762 // has the same length as the mask and return type.
763 unsigned int StartIndex = (unsigned)IsExtension;
764 auto *SourceTy =
765 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
766 // Note that the element count of each source vector does *not* need to be the
767 // same as the element count of the return type and mask! All source vectors
768 // must have the same element count as each other, though.
769 unsigned NumElementsPerSource = SourceTy->getNumElements();
770
771 // There are no tbl/tbx intrinsics for which the destination size exceeds the
772 // source size. However, our definitions of the intrinsics, at least in
773 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
774 // *could* technically happen.
775 if (NumIndexes > NumElementsPerSource)
776 return nullptr;
777
778 // The tbl/tbx intrinsics take several source operands followed by a mask
779 // operand.
780 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
781
782 // Map input operands to shuffle indices. This also helpfully deduplicates the
783 // input arguments, in case the same value is passed as an argument multiple
784 // times.
785 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
786 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
787 PoisonValue::get(SourceTy)};
788
789 int Indexes[16];
790 for (unsigned I = 0; I < NumIndexes; ++I) {
791 Constant *COp = C->getAggregateElement(I);
792
793 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
794 return nullptr;
795
796 if (isa<UndefValue>(COp)) {
797 Indexes[I] = -1;
798 continue;
799 }
800
801 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
802 // The index of the input argument that this index references (0 = first
803 // source argument, etc).
804 unsigned SourceOperandIndex = Index / NumElementsPerSource;
805 // The index of the element at that source operand.
806 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
807
808 Value *SourceOperand;
809 if (SourceOperandIndex >= NumSourceOperands) {
810 // This index is out of bounds. Map it to index into either the fallback
811 // vector (tbx) or vector of zeroes (tbl).
812 SourceOperandIndex = NumSourceOperands;
813 if (IsExtension) {
814 // For out-of-bounds indices in tbx, choose the `I`th element of the
815 // fallback.
816 SourceOperand = II.getArgOperand(0);
817 SourceOperandElementIndex = I;
818 } else {
819 // Otherwise, choose some element from the dummy vector of zeroes (we'll
820 // always choose the first).
821 SourceOperand = Constant::getNullValue(SourceTy);
822 SourceOperandElementIndex = 0;
823 }
824 } else {
825 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
826 }
827
828 // The source operand may be the fallback vector, which may not have the
829 // same number of elements as the source vector. In that case, we *could*
830 // choose to extend its length with another shufflevector, but it's simpler
831 // to just bail instead.
832 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
833 NumElementsPerSource)
834 return nullptr;
835
836 // We now know the source operand referenced by this index. Make it a
837 // shufflevector operand, if it isn't already.
838 unsigned NumSlots = ValueToShuffleSlot.size();
839 // This shuffle references more than two sources, and hence cannot be
840 // represented as a shufflevector.
841 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
842 return nullptr;
843
844 auto [It, Inserted] =
845 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
846 if (Inserted)
847 ShuffleOperands[It->getSecond()] = SourceOperand;
848
849 unsigned RemappedIndex =
850 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
851 Indexes[I] = RemappedIndex;
852 }
853
855 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
856 return IC.replaceInstUsesWith(II, Shuf);
857}
858
859// Returns true iff the 2 intrinsics have the same operands, limiting the
860// comparison to the first NumOperands.
861static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
862 unsigned NumOperands) {
863 assert(I.arg_size() >= NumOperands && "Not enough operands");
864 assert(E.arg_size() >= NumOperands && "Not enough operands");
865 for (unsigned i = 0; i < NumOperands; i++)
866 if (I.getArgOperand(i) != E.getArgOperand(i))
867 return false;
868 return true;
869}
870
871// Remove trivially empty start/end intrinsic ranges, i.e. a start
872// immediately followed by an end (ignoring debuginfo or other
873// start/end intrinsics in between). As this handles only the most trivial
874// cases, tracking the nesting level is not needed:
875//
876// call @llvm.foo.start(i1 0)
877// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
878// call @llvm.foo.end(i1 0)
879// call @llvm.foo.end(i1 0) ; &I
880static bool
882 std::function<bool(const IntrinsicInst &)> IsStart) {
883 // We start from the end intrinsic and scan backwards, so that InstCombine
884 // has already processed (and potentially removed) all the instructions
885 // before the end intrinsic.
886 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
887 for (; BI != BE; ++BI) {
888 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
889 if (I->isDebugOrPseudoInst() ||
890 I->getIntrinsicID() == EndI.getIntrinsicID())
891 continue;
892 if (IsStart(*I)) {
893 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
895 IC.eraseInstFromFunction(EndI);
896 return true;
897 }
898 // Skip start intrinsics that don't pair with this end intrinsic.
899 continue;
900 }
901 }
902 break;
903 }
904
905 return false;
906}
907
909 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
910 // Bail out on the case where the source va_list of a va_copy is destroyed
911 // immediately by a follow-up va_end.
912 return II.getIntrinsicID() == Intrinsic::vastart ||
913 (II.getIntrinsicID() == Intrinsic::vacopy &&
914 I.getArgOperand(0) != II.getArgOperand(1));
915 });
916 return nullptr;
917}
918
920 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
921 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
922 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
923 Call.setArgOperand(0, Arg1);
924 Call.setArgOperand(1, Arg0);
925 return &Call;
926 }
927 return nullptr;
928}
929
930/// Creates a result tuple for an overflow intrinsic \p II with a given
931/// \p Result and a constant \p Overflow value.
933 Constant *Overflow) {
934 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
935 StructType *ST = cast<StructType>(II->getType());
936 Constant *Struct = ConstantStruct::get(ST, V);
937 return InsertValueInst::Create(Struct, Result, 0);
938}
939
941InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
942 WithOverflowInst *WO = cast<WithOverflowInst>(II);
943 Value *OperationResult = nullptr;
944 Constant *OverflowResult = nullptr;
945 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
946 WO->getRHS(), *WO, OperationResult, OverflowResult))
947 return createOverflowTuple(WO, OperationResult, OverflowResult);
948
949 // See whether we can optimize the overflow check with assumption information.
950 for (User *U : WO->users()) {
951 if (!match(U, m_ExtractValue<1>(m_Value())))
952 continue;
953
954 for (auto &AssumeVH : AC.assumptionsFor(U)) {
955 if (!AssumeVH)
956 continue;
957 CallInst *I = cast<CallInst>(AssumeVH);
958 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
959 continue;
960 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
961 /*AllowEphemerals=*/true))
962 continue;
963 Value *Result =
964 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
965 Result->takeName(WO);
966 if (auto *Inst = dyn_cast<Instruction>(Result)) {
967 if (WO->isSigned())
968 Inst->setHasNoSignedWrap();
969 else
970 Inst->setHasNoUnsignedWrap();
971 }
972 return createOverflowTuple(WO, Result,
973 ConstantInt::getFalse(U->getType()));
974 }
975 }
976
977 return nullptr;
978}
979
980static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
981 Ty = Ty->getScalarType();
982 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
983}
984
985static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
986 Ty = Ty->getScalarType();
987 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
988}
989
990/// \returns the compare predicate type if the test performed by
991/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
992/// floating-point environment assumed for \p F for type \p Ty
994 const Function &F, Type *Ty) {
995 switch (static_cast<unsigned>(Mask)) {
996 case fcZero:
997 if (inputDenormalIsIEEE(F, Ty))
998 return FCmpInst::FCMP_OEQ;
999 break;
1000 case fcZero | fcSubnormal:
1001 if (inputDenormalIsDAZ(F, Ty))
1002 return FCmpInst::FCMP_OEQ;
1003 break;
1004 case fcPositive | fcNegZero:
1005 if (inputDenormalIsIEEE(F, Ty))
1006 return FCmpInst::FCMP_OGE;
1007 break;
1009 if (inputDenormalIsDAZ(F, Ty))
1010 return FCmpInst::FCMP_OGE;
1011 break;
1013 if (inputDenormalIsIEEE(F, Ty))
1014 return FCmpInst::FCMP_OGT;
1015 break;
1016 case fcNegative | fcPosZero:
1017 if (inputDenormalIsIEEE(F, Ty))
1018 return FCmpInst::FCMP_OLE;
1019 break;
1021 if (inputDenormalIsDAZ(F, Ty))
1022 return FCmpInst::FCMP_OLE;
1023 break;
1025 if (inputDenormalIsIEEE(F, Ty))
1026 return FCmpInst::FCMP_OLT;
1027 break;
1028 case fcPosNormal | fcPosInf:
1029 if (inputDenormalIsDAZ(F, Ty))
1030 return FCmpInst::FCMP_OGT;
1031 break;
1032 case fcNegNormal | fcNegInf:
1033 if (inputDenormalIsDAZ(F, Ty))
1034 return FCmpInst::FCMP_OLT;
1035 break;
1036 case ~fcZero & ~fcNan:
1037 if (inputDenormalIsIEEE(F, Ty))
1038 return FCmpInst::FCMP_ONE;
1039 break;
1040 case ~(fcZero | fcSubnormal) & ~fcNan:
1041 if (inputDenormalIsDAZ(F, Ty))
1042 return FCmpInst::FCMP_ONE;
1043 break;
1044 default:
1045 break;
1046 }
1047
1049}
1050
1051Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1052 Value *Src0 = II.getArgOperand(0);
1053 Value *Src1 = II.getArgOperand(1);
1054 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1055 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1056 const bool IsUnordered = (Mask & fcNan) == fcNan;
1057 const bool IsOrdered = (Mask & fcNan) == fcNone;
1058 const FPClassTest OrderedMask = Mask & ~fcNan;
1059 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1060
1061 const bool IsStrict =
1062 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1063
1064 Value *FNegSrc;
1065 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
1066 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1067
1068 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
1069 return replaceOperand(II, 0, FNegSrc);
1070 }
1071
1072 Value *FAbsSrc;
1073 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
1074 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
1075 return replaceOperand(II, 0, FAbsSrc);
1076 }
1077
1078 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1079 (IsOrdered || IsUnordered) && !IsStrict) {
1080 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1081 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1082 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1083 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1085 FCmpInst::Predicate Pred =
1086 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1087 if (OrderedInvertedMask == fcInf)
1088 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1089
1090 Value *Fabs = Builder.CreateFAbs(Src0);
1091 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1092 CmpInf->takeName(&II);
1093 return replaceInstUsesWith(II, CmpInf);
1094 }
1095
1096 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1097 (IsOrdered || IsUnordered) && !IsStrict) {
1098 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1099 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1100 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1101 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1102 Constant *Inf =
1103 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1104 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1105 : Builder.CreateFCmpOEQ(Src0, Inf);
1106
1107 EqInf->takeName(&II);
1108 return replaceInstUsesWith(II, EqInf);
1109 }
1110
1111 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1112 (IsOrdered || IsUnordered) && !IsStrict) {
1113 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1114 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1115 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1116 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1118 OrderedInvertedMask == fcNegInf);
1119 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1120 : Builder.CreateFCmpONE(Src0, Inf);
1121 NeInf->takeName(&II);
1122 return replaceInstUsesWith(II, NeInf);
1123 }
1124
1125 if (Mask == fcNan && !IsStrict) {
1126 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1127 // exceptions.
1128 Value *IsNan =
1129 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1130 IsNan->takeName(&II);
1131 return replaceInstUsesWith(II, IsNan);
1132 }
1133
1134 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1135 // Equivalent of !isnan. Replace with standard fcmp.
1136 Value *FCmp =
1137 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1138 FCmp->takeName(&II);
1139 return replaceInstUsesWith(II, FCmp);
1140 }
1141
1143
1144 // Try to replace with an fcmp with 0
1145 //
1146 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1147 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1148 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1149 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1150 //
1151 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1152 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1153 //
1154 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1155 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1156 //
1157 if (!IsStrict && (IsOrdered || IsUnordered) &&
1158 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1159 Src0->getType())) !=
1162 // Equivalent of == 0.
1163 Value *FCmp = Builder.CreateFCmp(
1164 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1165 Src0, Zero);
1166
1167 FCmp->takeName(&II);
1168 return replaceInstUsesWith(II, FCmp);
1169 }
1170
1171 KnownFPClass Known =
1172 computeKnownFPClass(Src0, Mask, SQ.getWithInstruction(&II));
1173
1174 // Clear test bits we know must be false from the source value.
1175 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1176 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1177 if ((Mask & Known.KnownFPClasses) != Mask) {
1178 II.setArgOperand(
1179 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1180 return &II;
1181 }
1182
1183 // If none of the tests which can return false are possible, fold to true.
1184 // fp_class (nnan x), ~(qnan|snan) -> true
1185 // fp_class (ninf x), ~(ninf|pinf) -> true
1186 if (Mask == Known.KnownFPClasses)
1187 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1188
1189 return nullptr;
1190}
1191
1192static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1193 KnownBits Known = computeKnownBits(Op, SQ);
1194 if (Known.isNonNegative())
1195 return false;
1196 if (Known.isNegative())
1197 return true;
1198
1199 Value *X, *Y;
1200 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1202
1203 return std::nullopt;
1204}
1205
1206static std::optional<bool> getKnownSignOrZero(Value *Op,
1207 const SimplifyQuery &SQ) {
1208 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1209 return Sign;
1210
1211 Value *X, *Y;
1212 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1214
1215 return std::nullopt;
1216}
1217
1218/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1219static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1220 const SimplifyQuery &SQ) {
1221 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1222 if (!Known1)
1223 return false;
1224 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1225 if (!Known0)
1226 return false;
1227 return *Known0 == *Known1;
1228}
1229
1230// Determines if ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b)) is safe.
1231//
1232// This is true if, when the add saturates, the resulting ldexp is guaranteed to
1233// produce 0 or inf.
1234static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy) {
1235 const fltSemantics &FltSem = FpTy->getScalarType()->getFltSemantics();
1236 if (!APFloat::semanticsHasInf(FltSem))
1237 return false;
1238
1239 // Cap ExpBits at 32 because scalbn takes an int. This is sufficient for any
1240 // reasonable fp type (for example, `double` only has 11 exponent bits).
1241 unsigned ExpBits = std::min(ExpTy->getScalarSizeInBits(), 32u);
1242 int SignedMax = static_cast<int>(maxIntN(ExpBits));
1243 int SignedMin = static_cast<int>(minIntN(ExpBits));
1244 APFloat ScaledUp = scalbn(APFloat::getSmallest(FltSem), SignedMax,
1246 APFloat ScaledDown = scalbn(APFloat::getLargest(FltSem), SignedMin,
1248 return ScaledUp.isInfinity() && ScaledDown.isZero();
1249}
1250
1251/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1252/// can trigger other combines.
1254 InstCombiner::BuilderTy &Builder) {
1255 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1256 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1257 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1258 "Expected a min or max intrinsic");
1259
1260 // TODO: Match vectors with undef elements, but undef may not propagate.
1261 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1262 Value *X;
1263 const APInt *C0, *C1;
1264 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1265 !match(Op1, m_APInt(C1)))
1266 return nullptr;
1267
1268 // Check for necessary no-wrap and overflow constraints.
1269 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1270 auto *Add = cast<BinaryOperator>(Op0);
1271 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1272 (!IsSigned && !Add->hasNoUnsignedWrap()))
1273 return nullptr;
1274
1275 // If the constant difference overflows, then instsimplify should reduce the
1276 // min/max to the add or C1.
1277 bool Overflow;
1278 APInt CDiff =
1279 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1280 assert(!Overflow && "Expected simplify of min/max");
1281
1282 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1283 // Note: the "mismatched" no-overflow setting does not propagate.
1284 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1285 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1286 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1287 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1288}
1289/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1290Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1291 Type *Ty = MinMax1.getType();
1292
1293 // We are looking for a tree of:
1294 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1295 // Where the min and max could be reversed
1296 Instruction *MinMax2;
1297 BinaryOperator *AddSub;
1298 const APInt *MinValue, *MaxValue;
1299 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1300 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1301 return nullptr;
1302 } else if (match(&MinMax1,
1303 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1304 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1305 return nullptr;
1306 } else
1307 return nullptr;
1308
1309 // Check that the constants clamp a saturate, and that the new type would be
1310 // sensible to convert to.
1311 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1312 return nullptr;
1313 // In what bitwidth can this be treated as saturating arithmetics?
1314 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1315 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1316 // good first approximation for what should be done there.
1317 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1318 return nullptr;
1319
1320 // Also make sure that the inner min/max and the add/sub have one use.
1321 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1322 return nullptr;
1323
1324 // Create the new type (which can be a vector type)
1325 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1326
1327 Intrinsic::ID IntrinsicID;
1328 if (AddSub->getOpcode() == Instruction::Add)
1329 IntrinsicID = Intrinsic::sadd_sat;
1330 else if (AddSub->getOpcode() == Instruction::Sub)
1331 IntrinsicID = Intrinsic::ssub_sat;
1332 else
1333 return nullptr;
1334
1335 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1336 // is usually achieved via a sext from a smaller type.
1337 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1338 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1339 return nullptr;
1340
1341 // Finally create and return the sat intrinsic, truncated to the new type
1342 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1343 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1344 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1345 return CastInst::Create(Instruction::SExt, Sat, Ty);
1346}
1347
1348
1349/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1350/// can only be one of two possible constant values -- turn that into a select
1351/// of constants.
1353 InstCombiner::BuilderTy &Builder) {
1354 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1355 Value *X;
1356 const APInt *C0, *C1;
1357 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1358 return nullptr;
1359
1361 switch (II->getIntrinsicID()) {
1362 case Intrinsic::smax:
1363 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1364 Pred = ICmpInst::ICMP_SGT;
1365 break;
1366 case Intrinsic::smin:
1367 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1368 Pred = ICmpInst::ICMP_SLT;
1369 break;
1370 case Intrinsic::umax:
1371 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1372 Pred = ICmpInst::ICMP_UGT;
1373 break;
1374 case Intrinsic::umin:
1375 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1376 Pred = ICmpInst::ICMP_ULT;
1377 break;
1378 default:
1379 llvm_unreachable("Expected min/max intrinsic");
1380 }
1381 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1382 return nullptr;
1383
1384 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1385 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1386 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1387 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1388}
1389
1390/// If this min/max has a constant operand and an operand that is a matching
1391/// min/max with a constant operand, constant-fold the 2 constant operands.
1393 IRBuilderBase &Builder,
1394 const SimplifyQuery &SQ) {
1395 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1396 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1397 if (!LHS)
1398 return nullptr;
1399
1400 Constant *C0, *C1;
1401 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1402 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1403 return nullptr;
1404
1405 // max (max X, C0), C1 --> max X, (max C0, C1)
1406 // min (min X, C0), C1 --> min X, (min C0, C1)
1407 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1408 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1409 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1410 if (InnerMinMaxID != MinMaxID &&
1411 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1412 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1413 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1414 return nullptr;
1415
1417 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1418 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1419 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1420 {LHS->getArgOperand(0), NewC});
1421}
1422
1423/// If this min/max has a matching min/max operand with a constant, try to push
1424/// the constant operand into this instruction. This can enable more folds.
1425static Instruction *
1427 InstCombiner::BuilderTy &Builder) {
1428 // Match and capture a min/max operand candidate.
1429 Value *X, *Y;
1430 Constant *C;
1431 Instruction *Inner;
1433 m_Instruction(Inner),
1435 m_Value(Y))))
1436 return nullptr;
1437
1438 // The inner op must match. Check for constants to avoid infinite loops.
1439 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1440 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1441 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1443 return nullptr;
1444
1445 // max (max X, C), Y --> max (max X, Y), C
1447 MinMaxID, II->getType());
1448 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1449 NewInner->takeName(Inner);
1450 return CallInst::Create(MinMax, {NewInner, C});
1451}
1452
1453/// Reduce a sequence of min/max intrinsics with a common operand.
1455 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1456 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1457 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1458 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1459 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1460 RHS->getIntrinsicID() != MinMaxID ||
1461 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1462 return nullptr;
1463
1464 Value *A = LHS->getArgOperand(0);
1465 Value *B = LHS->getArgOperand(1);
1466 Value *C = RHS->getArgOperand(0);
1467 Value *D = RHS->getArgOperand(1);
1468
1469 // Look for a common operand.
1470 Value *MinMaxOp = nullptr;
1471 Value *ThirdOp = nullptr;
1472 if (LHS->hasOneUse()) {
1473 // If the LHS is only used in this chain and the RHS is used outside of it,
1474 // reuse the RHS min/max because that will eliminate the LHS.
1475 if (D == A || C == A) {
1476 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1477 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1478 MinMaxOp = RHS;
1479 ThirdOp = B;
1480 } else if (D == B || C == B) {
1481 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1482 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1483 MinMaxOp = RHS;
1484 ThirdOp = A;
1485 }
1486 } else {
1487 assert(RHS->hasOneUse() && "Expected one-use operand");
1488 // Reuse the LHS. This will eliminate the RHS.
1489 if (D == A || D == B) {
1490 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1491 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1492 MinMaxOp = LHS;
1493 ThirdOp = C;
1494 } else if (C == A || C == B) {
1495 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1496 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1497 MinMaxOp = LHS;
1498 ThirdOp = D;
1499 }
1500 }
1501
1502 if (!MinMaxOp || !ThirdOp)
1503 return nullptr;
1504
1505 Module *Mod = II->getModule();
1506 Function *MinMax =
1507 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1508 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1509}
1510
1511/// If all arguments of the intrinsic are unary shuffles with the same mask,
1512/// try to shuffle after the intrinsic.
1515 if (!II->getType()->isVectorTy() ||
1516 !isTriviallyVectorizable(II->getIntrinsicID()) ||
1517 !II->getCalledFunction()->isSpeculatable())
1518 return nullptr;
1519
1520 Value *X;
1521 Constant *C;
1522 ArrayRef<int> Mask;
1523 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1524 return isa<Constant>(Arg.get()) ||
1525 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1526 Arg.getOperandNo(), nullptr);
1527 });
1528 if (!NonConstArg ||
1529 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1530 return nullptr;
1531
1532 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1533 // instructions.
1534 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1535 return nullptr;
1536
1537 // See if all arguments are shuffled with the same mask.
1539 Type *SrcTy = X->getType();
1540 for (Use &Arg : II->args()) {
1541 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1542 Arg.getOperandNo(), nullptr))
1543 NewArgs.push_back(Arg);
1544 else if (match(&Arg,
1545 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1546 X->getType() == SrcTy)
1547 NewArgs.push_back(X);
1548 else if (match(&Arg, m_ImmConstant(C))) {
1549 // If it's a constant, try find the constant that would be shuffled to C.
1550 if (Constant *ShuffledC =
1551 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1552 NewArgs.push_back(ShuffledC);
1553 else
1554 return nullptr;
1555 } else
1556 return nullptr;
1557 }
1558
1559 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1560 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1561 // Result type might be a different vector width.
1562 // TODO: Check that the result type isn't widened?
1563 VectorType *ResTy =
1564 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1565 Value *NewIntrinsic =
1566 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1567 return new ShuffleVectorInst(NewIntrinsic, Mask);
1568}
1569
1570/// If all arguments of the intrinsic are reverses, try to pull the reverse
1571/// after the intrinsic.
1573 if (!II->getType()->isVectorTy() ||
1574 !isTriviallyVectorizable(II->getIntrinsicID()))
1575 return nullptr;
1576
1577 // At least 1 operand must be a reverse with 1 use because we are creating 2
1578 // instructions.
1579 if (none_of(II->args(), [](Value *V) {
1580 return match(V, m_OneUse(m_VecReverse(m_Value())));
1581 }))
1582 return nullptr;
1583
1584 Value *X;
1585 Constant *C;
1586 SmallVector<Value *> NewArgs;
1587 for (Use &Arg : II->args()) {
1588 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1589 Arg.getOperandNo(), nullptr))
1590 NewArgs.push_back(Arg);
1591 else if (match(&Arg, m_VecReverse(m_Value(X))))
1592 NewArgs.push_back(X);
1593 else if (isSplatValue(Arg))
1594 NewArgs.push_back(Arg);
1595 else if (match(&Arg, m_ImmConstant(C)))
1596 NewArgs.push_back(Builder.CreateVectorReverse(C));
1597 else
1598 return nullptr;
1599 }
1600
1601 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1602 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1603 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1604 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1605 return Builder.CreateVectorReverse(NewIntrinsic);
1606}
1607
1608/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1609/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1610/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1611template <Intrinsic::ID IntrID>
1613 InstCombiner::BuilderTy &Builder) {
1614 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1615 "This helper only supports BSWAP and BITREVERSE intrinsics");
1616
1617 Value *X, *Y;
1618 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1619 // don't match ConstantExpr that aren't meaningful for this transform.
1622 Value *OldReorderX, *OldReorderY;
1624
1625 // If both X and Y are bswap/bitreverse, the transform reduces the number
1626 // of instructions even if there's multiuse.
1627 // If only one operand is bswap/bitreverse, we need to ensure the operand
1628 // have only one use.
1629 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1630 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1631 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1632 }
1633
1634 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1635 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1636 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1637 }
1638
1639 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1640 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1641 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1642 }
1643 }
1644 return nullptr;
1645}
1646
1647/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1648/// `f(f(x, y), y) == f(x, y)` holds.
1650 switch (IID) {
1651 case Intrinsic::smax:
1652 case Intrinsic::smin:
1653 case Intrinsic::umax:
1654 case Intrinsic::umin:
1655 case Intrinsic::maximum:
1656 case Intrinsic::minimum:
1657 case Intrinsic::maximumnum:
1658 case Intrinsic::minimumnum:
1659 case Intrinsic::maxnum:
1660 case Intrinsic::minnum:
1661 return true;
1662 default:
1663 return false;
1664 }
1665}
1666
1667/// Attempt to simplify value-accumulating recurrences of kind:
1668/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1669/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1670/// And let the idempotent binary intrinsic be hoisted, when the operands are
1671/// known to be loop-invariant.
1673 IntrinsicInst *II) {
1674 PHINode *PN;
1675 Value *Init, *OtherOp;
1676
1677 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1678 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1679 auto IID = II->getIntrinsicID();
1680 if (!isIdempotentBinaryIntrinsic(IID) ||
1682 !IC.getDominatorTree().dominates(OtherOp, PN))
1683 return nullptr;
1684
1685 auto *InvariantBinaryInst =
1686 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1687 if (isa<FPMathOperator>(InvariantBinaryInst))
1688 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1689 return InvariantBinaryInst;
1690}
1691
1692static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1693 if (!CanReorderLanes)
1694 return nullptr;
1695
1696 Value *V;
1697 if (match(Arg, m_VecReverse(m_Value(V))))
1698 return V;
1699
1700 ArrayRef<int> Mask;
1701 if (!isa<FixedVectorType>(Arg->getType()) ||
1702 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1703 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1704 return nullptr;
1705
1706 int Sz = Mask.size();
1707 SmallBitVector UsedIndices(Sz);
1708 for (int Idx : Mask) {
1709 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1710 return nullptr;
1711 UsedIndices.set(Idx);
1712 }
1713
1714 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1715 // other changes.
1716 return UsedIndices.all() ? V : nullptr;
1717}
1718
1719/// Fold an unsigned minimum of trailing or leading zero bits counts:
1720/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1721/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1722/// >> ConstOp))
1723/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1724/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1725template <Intrinsic::ID IntrID>
1726static Value *
1728 const DataLayout &DL,
1729 InstCombiner::BuilderTy &Builder) {
1730 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1731 "This helper only supports cttz and ctlz intrinsics");
1732
1733 Value *CtOp1, *CtOp2;
1734 Value *ZeroUndef1, *ZeroUndef2;
1735 if (!match(I0, m_OneUse(
1736 m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
1737 return nullptr;
1738
1739 if (match(I1,
1740 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
1741 return Builder.CreateBinaryIntrinsic(
1742 IntrID, Builder.CreateOr(CtOp1, CtOp2),
1743 Builder.CreateOr(ZeroUndef1, ZeroUndef2));
1744
1745 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1746 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1747 if (!match(I1, m_CheckedInt(LessBitWidth)))
1748 // We have a constant >= BitWidth (which can be handled by CVP)
1749 // or a non-splat vector with elements < and >= BitWidth
1750 return nullptr;
1751
1752 Type *Ty = I1->getType();
1754 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1755 IntrID == Intrinsic::cttz
1756 ? ConstantInt::get(Ty, 1)
1757 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1758 cast<Constant>(I1), DL);
1759 return Builder.CreateBinaryIntrinsic(
1760 IntrID, Builder.CreateOr(CtOp1, NewConst),
1761 ConstantInt::getTrue(ZeroUndef1->getType()));
1762}
1763
1764/// Return whether "X LOp (Y ROp Z)" is always equal to
1765/// "(X LOp Y) ROp (X LOp Z)".
1767 bool HasNSW, Intrinsic::ID ROp) {
1768 switch (ROp) {
1769 case Intrinsic::umax:
1770 case Intrinsic::umin:
1771 if (HasNUW && LOp == Instruction::Add)
1772 return true;
1773 if (HasNUW && LOp == Instruction::Shl)
1774 return true;
1775 return false;
1776 case Intrinsic::smax:
1777 case Intrinsic::smin:
1778 return HasNSW && LOp == Instruction::Add;
1779 default:
1780 return false;
1781 }
1782}
1783
1784/// Return whether "(X ROp Y) LOp Z" is always equal to
1785/// "(X LOp Z) ROp (Y LOp Z)".
1787 bool HasNSW, Intrinsic::ID ROp) {
1788 if (Instruction::isCommutative(LOp) || LOp == Instruction::Shl)
1789 return leftDistributesOverRight(LOp, HasNUW, HasNSW, ROp);
1790 switch (ROp) {
1791 case Intrinsic::umax:
1792 case Intrinsic::umin:
1793 return HasNUW && LOp == Instruction::Sub;
1794 case Intrinsic::smax:
1795 case Intrinsic::smin:
1796 return HasNSW && LOp == Instruction::Sub;
1797 default:
1798 return false;
1799 }
1800}
1801
1802// Attempts to factorise a common term
1803// in an instruction that has the form "(A op' B) op (C op' D)
1804// where op is an intrinsic and op' is a binop
1805static Value *
1807 InstCombiner::BuilderTy &Builder) {
1808 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1809 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1810
1813
1814 if (!Op0 || !Op1)
1815 return nullptr;
1816
1817 if (Op0->getOpcode() != Op1->getOpcode())
1818 return nullptr;
1819
1820 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1821 return nullptr;
1822
1823 Instruction::BinaryOps InnerOpcode =
1824 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1825 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1826 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1827
1828 Value *A = Op0->getOperand(0);
1829 Value *B = Op0->getOperand(1);
1830 Value *C = Op1->getOperand(0);
1831 Value *D = Op1->getOperand(1);
1832
1833 // Attempts to swap variables such that A equals C or B equals D,
1834 // if the inner operation is commutative.
1835 if (Op0->isCommutative() && A != C && B != D) {
1836 if (A == D || B == C)
1837 std::swap(C, D);
1838 else
1839 return nullptr;
1840 }
1841
1842 BinaryOperator *NewBinop;
1843 if (A == C &&
1844 leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode)) {
1845 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1846 NewBinop =
1847 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1848 } else if (B == D && rightDistributesOverLeft(InnerOpcode, HasNUW, HasNSW,
1849 TopLevelOpcode)) {
1850 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1851 NewBinop =
1852 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1853 } else {
1854 return nullptr;
1855 }
1856
1857 NewBinop->setHasNoUnsignedWrap(HasNUW);
1858 NewBinop->setHasNoSignedWrap(HasNSW);
1859
1860 return NewBinop;
1861}
1862
1864 Value *Arg0 = II->getArgOperand(0);
1865 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1866 if (!ShiftConst)
1867 return nullptr;
1868
1869 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1870 bool AllPositive = true;
1871 bool AllNegative = true;
1872
1873 auto Check = [&](Constant *C) -> bool {
1874 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1875 const APInt &V = CI->getValue();
1876 if (V.isNonNegative()) {
1877 AllNegative = false;
1878 return AllPositive && V.ult(ElemBits);
1879 }
1880 AllPositive = false;
1881 return AllNegative && V.sgt(-ElemBits);
1882 }
1883 return false;
1884 };
1885
1886 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1887 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1888 if (!Check(ShiftConst->getAggregateElement(I)))
1889 return nullptr;
1890 }
1891
1892 } else if (!Check(ShiftConst))
1893 return nullptr;
1894
1895 IRBuilderBase &B = IC.Builder;
1896 if (AllPositive)
1897 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1898
1899 Value *NegAmt = B.CreateNeg(ShiftConst);
1900 Intrinsic::ID IID = II->getIntrinsicID();
1901 const bool IsSigned =
1902 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1903 Value *Result =
1904 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1905 return IC.replaceInstUsesWith(*II, Result);
1906}
1907
1908/// CallInst simplification. This mostly only handles folding of intrinsic
1909/// instructions. For normal calls, it allows visitCallBase to do the heavy
1910/// lifting.
1912 // Don't try to simplify calls without uses. It will not do anything useful,
1913 // but will result in the following folds being skipped.
1914 if (!CI.use_empty()) {
1915 SmallVector<Value *, 8> Args(CI.args());
1916 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1917 SQ.getWithInstruction(&CI)))
1918 return replaceInstUsesWith(CI, V);
1919 }
1920
1921 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1922 return visitFree(CI, FreedOp);
1923
1924 // If the caller function (i.e. us, the function that contains this CallInst)
1925 // is nounwind, mark the call as nounwind, even if the callee isn't.
1926 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1927 CI.setDoesNotThrow();
1928 return &CI;
1929 }
1930
1932 if (!II)
1933 return visitCallBase(CI);
1934
1935 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1936 // instead of in visitCallBase.
1937 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1938 if (auto NumBytes = MI->getLengthInBytes()) {
1939 // memmove/cpy/set of zero bytes is a noop.
1940 if (NumBytes->isZero())
1941 return eraseInstFromFunction(CI);
1942
1943 // For atomic unordered mem intrinsics if len is not a positive or
1944 // not a multiple of element size then behavior is undefined.
1945 if (MI->isAtomic() &&
1946 (NumBytes->isNegative() ||
1947 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1949 assert(MI->getType()->isVoidTy() &&
1950 "non void atomic unordered mem intrinsic");
1951 return eraseInstFromFunction(*MI);
1952 }
1953 }
1954
1955 // No other transformations apply to volatile transfers.
1956 if (MI->isVolatile())
1957 return nullptr;
1958
1960 // memmove(x,x,size) -> noop.
1961 if (MTI->getSource() == MTI->getDest())
1962 return eraseInstFromFunction(CI);
1963 }
1964
1965 auto IsPointerUndefined = [MI](Value *Ptr) {
1966 return isa<ConstantPointerNull>(Ptr) &&
1968 MI->getFunction(),
1969 cast<PointerType>(Ptr->getType())->getAddressSpace());
1970 };
1971 bool SrcIsUndefined = false;
1972 // If we can determine a pointer alignment that is bigger than currently
1973 // set, update the alignment.
1974 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1976 return I;
1977 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1978 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1979 if (Instruction *I = SimplifyAnyMemSet(MSI))
1980 return I;
1981 }
1982
1983 // If src/dest is null, this memory intrinsic must be a noop.
1984 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1985 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1986 return eraseInstFromFunction(CI);
1987 }
1988
1989 // If we have a memmove and the source operation is a constant global,
1990 // then the source and dest pointers can't alias, so we can change this
1991 // into a call to memcpy.
1992 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1993 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1994 if (GVSrc->isConstant()) {
1995 Module *M = CI.getModule();
1996 Intrinsic::ID MemCpyID =
1997 MMI->isAtomic()
1998 ? Intrinsic::memcpy_element_unordered_atomic
1999 : Intrinsic::memcpy;
2000 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
2001 CI.getArgOperand(1)->getType(),
2002 CI.getArgOperand(2)->getType() };
2004 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
2005 return II;
2006 }
2007 }
2008 }
2009
2010 // For fixed width vector result intrinsics, use the generic demanded vector
2011 // support.
2012 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
2013 auto VWidth = IIFVTy->getNumElements();
2014 APInt PoisonElts(VWidth, 0);
2015 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2016 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
2017 if (V != II)
2018 return replaceInstUsesWith(*II, V);
2019 return II;
2020 }
2021 }
2022
2023 if (II->isCommutative()) {
2024 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
2025 replaceOperand(*II, 0, Pair->first);
2026 replaceOperand(*II, 1, Pair->second);
2027 return II;
2028 }
2029
2030 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
2031 return NewCall;
2032 }
2033
2034 // Unused constrained FP intrinsic calls may have declared side effect, which
2035 // prevents it from being removed. In some cases however the side effect is
2036 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
2037 // returns a replacement, the call may be removed.
2038 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
2039 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
2040 return eraseInstFromFunction(CI);
2041 }
2042
2043 Intrinsic::ID IID = II->getIntrinsicID();
2044 switch (IID) {
2045 case Intrinsic::objectsize: {
2046 SmallVector<Instruction *> InsertedInstructions;
2047 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
2048 &InsertedInstructions)) {
2049 for (Instruction *Inserted : InsertedInstructions)
2050 Worklist.add(Inserted);
2051 return replaceInstUsesWith(CI, V);
2052 }
2053 return nullptr;
2054 }
2055 case Intrinsic::abs: {
2056 Value *IIOperand = II->getArgOperand(0);
2057 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2058
2059 // abs(-x) -> abs(x)
2060 Value *X;
2061 if (match(IIOperand, m_Neg(m_Value(X)))) {
2062 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2063 replaceOperand(*II, 1, Builder.getTrue());
2064 return replaceOperand(*II, 0, X);
2065 }
2066 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2067 return replaceOperand(*II, 0, X);
2068
2069 Value *Y;
2070 // abs(a * abs(b)) -> abs(a * b)
2071 if (match(IIOperand,
2074 bool NSW =
2075 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2076 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2077 return replaceOperand(*II, 0, XY);
2078 }
2079
2080 if (std::optional<bool> Known =
2081 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2082 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2083 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2084 if (!*Known)
2085 return replaceInstUsesWith(*II, IIOperand);
2086
2087 // abs(x) -> -x if x < 0
2088 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2089 if (IntMinIsPoison)
2090 return BinaryOperator::CreateNSWNeg(IIOperand);
2091 return BinaryOperator::CreateNeg(IIOperand);
2092 }
2093
2094 // abs (sext X) --> zext (abs X*)
2095 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2096 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2097 Value *NarrowAbs =
2098 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2099 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2100 }
2101
2102 // Match a complicated way to check if a number is odd/even:
2103 // abs (srem X, 2) --> and X, 1
2104 const APInt *C;
2105 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2106 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2107
2108 break;
2109 }
2110 case Intrinsic::umin: {
2111 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2112 // umin(x, 1) == zext(x != 0)
2113 if (match(I1, m_One())) {
2114 assert(II->getType()->getScalarSizeInBits() != 1 &&
2115 "Expected simplify of umin with max constant");
2116 Value *Zero = Constant::getNullValue(I0->getType());
2117 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2118 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2119 }
2120 // umin(cttz(x), const) --> cttz(x | (1 << const))
2121 if (Value *FoldedCttz =
2123 I0, I1, DL, Builder))
2124 return replaceInstUsesWith(*II, FoldedCttz);
2125 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2126 if (Value *FoldedCtlz =
2128 I0, I1, DL, Builder))
2129 return replaceInstUsesWith(*II, FoldedCtlz);
2130 [[fallthrough]];
2131 }
2132 case Intrinsic::umax: {
2133 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2134 Value *X, *Y;
2135 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2136 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2137 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2138 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2139 }
2140 Constant *C;
2141 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2142 I0->hasOneUse()) {
2143 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2144 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2145 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2146 }
2147 }
2148 // If C is not 0:
2149 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2150 // If C is not 0 or 1:
2151 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2152 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2153 const APInt *C;
2154 Value *X;
2155 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2156 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2157 return nullptr;
2158 if (C->isZero())
2159 return nullptr;
2160 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2161 return nullptr;
2162
2163 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2164 Value *NewSelect = nullptr;
2165 NewSelect = Builder.CreateSelectWithUnknownProfile(
2166 Cmp, ConstantInt::get(X->getType(), 1), A, DEBUG_TYPE);
2167 return replaceInstUsesWith(*II, NewSelect);
2168 };
2169
2170 if (IID == Intrinsic::umax) {
2171 if (Instruction *I = foldMaxMulShift(I0, I1))
2172 return I;
2173 if (Instruction *I = foldMaxMulShift(I1, I0))
2174 return I;
2175 }
2176
2177 // If both operands of unsigned min/max are sign-extended, it is still ok
2178 // to narrow the operation.
2179 [[fallthrough]];
2180 }
2181 case Intrinsic::smax:
2182 case Intrinsic::smin: {
2183 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2184 Value *X, *Y;
2185 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2186 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2187 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2188 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2189 }
2190
2191 Constant *C;
2192 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2193 I0->hasOneUse()) {
2194 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2195 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2196 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2197 }
2198 }
2199
2200 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2201 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2202 const APInt *MinC, *MaxC;
2203 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2204 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2205 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2206 Value *NewMax = Builder.CreateBinaryIntrinsic(
2207 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2208 return replaceInstUsesWith(
2209 *II, Builder.CreateBinaryIntrinsic(
2210 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2211 };
2212 if (IID == Intrinsic::smax &&
2214 m_APInt(MinC)))) &&
2215 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2216 return CreateCanonicalClampForm(true);
2217 if (IID == Intrinsic::umax &&
2219 m_APInt(MinC)))) &&
2220 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2221 return CreateCanonicalClampForm(false);
2222
2223 // umin(i1 X, i1 Y) -> and i1 X, Y
2224 // smax(i1 X, i1 Y) -> and i1 X, Y
2225 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2226 II->getType()->isIntOrIntVectorTy(1)) {
2227 return BinaryOperator::CreateAnd(I0, I1);
2228 }
2229
2230 // umax(i1 X, i1 Y) -> or i1 X, Y
2231 // smin(i1 X, i1 Y) -> or i1 X, Y
2232 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2233 II->getType()->isIntOrIntVectorTy(1)) {
2234 return BinaryOperator::CreateOr(I0, I1);
2235 }
2236
2237 // smin(smax(X, -1), 1) -> scmp(X, 0)
2238 // smax(smin(X, 1), -1) -> scmp(X, 0)
2239 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2240 // And i1's have been changed to and/ors
2241 // So we only need to check for smin
2242 if (IID == Intrinsic::smin) {
2243 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2244 match(I1, m_One())) {
2245 Value *Zero = ConstantInt::get(X->getType(), 0);
2246 return replaceInstUsesWith(
2247 CI,
2248 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2249 }
2250 }
2251
2252 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2253 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2254 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2255 // TODO: Canonicalize neg after min/max if I1 is constant.
2256 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2257 (I0->hasOneUse() || I1->hasOneUse())) {
2259 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2260 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2261 }
2262 }
2263
2264 // (umax X, (xor X, Pow2))
2265 // -> (or X, Pow2)
2266 // (umin X, (xor X, Pow2))
2267 // -> (and X, ~Pow2)
2268 // (smax X, (xor X, Pos_Pow2))
2269 // -> (or X, Pos_Pow2)
2270 // (smin X, (xor X, Pos_Pow2))
2271 // -> (and X, ~Pos_Pow2)
2272 // (smax X, (xor X, Neg_Pow2))
2273 // -> (and X, ~Neg_Pow2)
2274 // (smin X, (xor X, Neg_Pow2))
2275 // -> (or X, Neg_Pow2)
2276 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2277 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2278 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2279 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2280 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2281
2282 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2283 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2284 if (KnownSign == std::nullopt) {
2285 UseOr = false;
2286 UseAndN = false;
2287 } else if (*KnownSign /* true is Signed. */) {
2288 UseOr ^= true;
2289 UseAndN ^= true;
2290 Type *Ty = I0->getType();
2291 // Negative power of 2 must be IntMin. It's possible to be able to
2292 // prove negative / power of 2 without actually having known bits, so
2293 // just get the value by hand.
2295 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2296 }
2297 }
2298 if (UseOr)
2299 return BinaryOperator::CreateOr(I0, X);
2300 else if (UseAndN)
2301 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2302 }
2303
2304 // If we can eliminate ~A and Y is free to invert:
2305 // max ~A, Y --> ~(min A, ~Y)
2306 //
2307 // Examples:
2308 // max ~A, ~Y --> ~(min A, Y)
2309 // max ~A, C --> ~(min A, ~C)
2310 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2311 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2312 Value *A;
2313 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2314 !isFreeToInvert(A, A->hasOneUse())) {
2315 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2317 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2318 return BinaryOperator::CreateNot(InvMaxMin);
2319 }
2320 }
2321 return nullptr;
2322 };
2323
2324 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2325 return I;
2326 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2327 return I;
2328
2330 return I;
2331
2332 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2333 const APInt *RHSC;
2334 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2335 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2336 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2337 ConstantInt::get(II->getType(), *RHSC));
2338
2339 // smax(X, -X) --> abs(X)
2340 // smin(X, -X) --> -abs(X)
2341 // umax(X, -X) --> -abs(X)
2342 // umin(X, -X) --> abs(X)
2343 if (isKnownNegation(I0, I1)) {
2344 // We can choose either operand as the input to abs(), but if we can
2345 // eliminate the only use of a value, that's better for subsequent
2346 // transforms/analysis.
2347 if (I0->hasOneUse() && !I1->hasOneUse())
2348 std::swap(I0, I1);
2349
2350 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2351 // operation and potentially its negation.
2352 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2353 Value *Abs = Builder.CreateBinaryIntrinsic(
2354 Intrinsic::abs, I0,
2355 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2356
2357 // We don't have a "nabs" intrinsic, so negate if needed based on the
2358 // max/min operation.
2359 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2360 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2361 return replaceInstUsesWith(CI, Abs);
2362 }
2363
2365 return Sel;
2366
2367 if (Instruction *SAdd = matchSAddSubSat(*II))
2368 return SAdd;
2369
2370 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2371 return replaceInstUsesWith(*II, NewMinMax);
2372
2374 return R;
2375
2376 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2377 return NewMinMax;
2378
2379 // Try to fold minmax with constant RHS based on range information
2380 if (match(I1, m_APIntAllowPoison(RHSC))) {
2381 ICmpInst::Predicate Pred =
2383 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2385 I0, IsSigned, SQ.getWithInstruction(II));
2386 if (!LHS_CR.isFullSet()) {
2387 if (LHS_CR.icmp(Pred, *RHSC))
2388 return replaceInstUsesWith(*II, I0);
2389 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2390 return replaceInstUsesWith(*II,
2391 ConstantInt::get(II->getType(), *RHSC));
2392 }
2393 }
2394
2396 return replaceInstUsesWith(*II, V);
2397
2398 break;
2399 }
2400 case Intrinsic::scmp: {
2401 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2402 Value *LHS, *RHS;
2403 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2404 return replaceInstUsesWith(
2405 CI,
2406 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2407 break;
2408 }
2409 case Intrinsic::bitreverse: {
2410 Value *IIOperand = II->getArgOperand(0);
2411 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2412 Value *X;
2413 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2414 X->getType()->isIntOrIntVectorTy(1)) {
2415 Type *Ty = II->getType();
2416 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2417 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2419 }
2420
2421 if (Instruction *crossLogicOpFold =
2423 return crossLogicOpFold;
2424
2425 break;
2426 }
2427 case Intrinsic::bswap: {
2428 Value *IIOperand = II->getArgOperand(0);
2429
2430 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2431 // inverse-shift-of-bswap:
2432 // bswap (shl X, Y) --> lshr (bswap X), Y
2433 // bswap (lshr X, Y) --> shl (bswap X), Y
2434 Value *X, *Y;
2435 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2436 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2438 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2439 BinaryOperator::BinaryOps InverseShift =
2440 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2441 ? Instruction::LShr
2442 : Instruction::Shl;
2443 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2444 }
2445 }
2446
2447 KnownBits Known = computeKnownBits(IIOperand, II);
2448 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2449 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2450 unsigned BW = Known.getBitWidth();
2451
2452 // bswap(x) -> shift(x) if x has exactly one "active byte"
2453 if (BW - LZ - TZ == 8) {
2454 assert(LZ != TZ && "active byte cannot be in the middle");
2455 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2456 return BinaryOperator::CreateNUWShl(
2457 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2458 // -> lshr(x) if the "active byte" is in the high part of x
2459 return BinaryOperator::CreateExactLShr(
2460 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2461 }
2462
2463 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2464 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2465 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2466 Value *CV = ConstantInt::get(X->getType(), C);
2467 Value *V = Builder.CreateLShr(X, CV);
2468 return new TruncInst(V, IIOperand->getType());
2469 }
2470
2471 if (Instruction *crossLogicOpFold =
2473 return crossLogicOpFold;
2474 }
2475
2476 // Try to fold into bitreverse if bswap is the root of the expression tree.
2477 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2478 /*MatchBitReversals*/ true))
2479 return BitOp;
2480 break;
2481 }
2482 case Intrinsic::masked_load:
2483 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2484 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2485 break;
2486 case Intrinsic::masked_store:
2487 return simplifyMaskedStore(*II);
2488 case Intrinsic::masked_gather:
2489 return simplifyMaskedGather(*II);
2490 case Intrinsic::masked_scatter:
2491 return simplifyMaskedScatter(*II);
2492 case Intrinsic::launder_invariant_group:
2493 case Intrinsic::strip_invariant_group:
2494 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2495 return replaceInstUsesWith(*II, SkippedBarrier);
2496 break;
2497 case Intrinsic::powi: {
2498 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2499 // 0 and 1 are handled in instsimplify
2500 // powi(x, -1) -> 1/x
2501 if (Power->isMinusOne())
2502 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2503 II->getArgOperand(0), II);
2504 // powi(x, 2) -> x*x
2505 if (Power->equalsInt(2))
2506 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2507 II->getArgOperand(0), II);
2508
2509 if (!Power->getValue()[0]) {
2510 Value *X;
2511 // If power is even:
2512 // powi(-x, p) -> powi(x, p)
2513 // powi(fabs(x), p) -> powi(x, p)
2514 // powi(copysign(x, y), p) -> powi(x, p)
2515 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2516 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2517 match(II->getArgOperand(0),
2519 return replaceOperand(*II, 0, X);
2520 }
2521 }
2522 if (ConstantFP *Base = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
2523 Value *Exp = II->getArgOperand(1);
2524 Type *Ty = Base->getType();
2525 // powi(2.0, p) -> ldexp(1.0, p)
2526 if (II->hasApproxFunc() && Base->isExactlyValue(2.0)) {
2527 ConstantFP *One = ConstantFP::get(Ty, 1.0);
2528 if (auto *VTy = dyn_cast<VectorType>(Ty))
2529 Exp = Builder.CreateVectorSplat(VTy->getElementCount(), Exp);
2530 Value *Ldexp = Builder.CreateLdexp(One, Exp, II);
2531 return replaceInstUsesWith(*II, Ldexp);
2532 }
2533 }
2534 break;
2535 }
2536
2537 case Intrinsic::cttz:
2538 case Intrinsic::ctlz:
2539 if (auto *I = foldCttzCtlz(*II, *this))
2540 return I;
2541 break;
2542
2543 case Intrinsic::ctpop:
2544 if (auto *I = foldCtpop(*II, *this))
2545 return I;
2546 break;
2547
2548 case Intrinsic::fshl:
2549 case Intrinsic::fshr: {
2550 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2551 Type *Ty = II->getType();
2552 unsigned BitWidth = Ty->getScalarSizeInBits();
2553 Constant *ShAmtC;
2554 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2555 // Canonicalize a shift amount constant operand to modulo the bit-width.
2556 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2557 Constant *ModuloC =
2558 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2559 if (!ModuloC)
2560 return nullptr;
2561 if (ModuloC != ShAmtC)
2562 return replaceOperand(*II, 2, ModuloC);
2563
2565 ShAmtC, DL),
2566 m_One()) &&
2567 "Shift amount expected to be modulo bitwidth");
2568
2569 // Canonicalize funnel shift right by constant to funnel shift left. This
2570 // is not entirely arbitrary. For historical reasons, the backend may
2571 // recognize rotate left patterns but miss rotate right patterns.
2572 if (IID == Intrinsic::fshr) {
2573 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2574 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2575 return nullptr;
2576
2577 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2578 Module *Mod = II->getModule();
2579 Function *Fshl =
2580 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2581 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2582 }
2583 assert(IID == Intrinsic::fshl &&
2584 "All funnel shifts by simple constants should go left");
2585
2586 // fshl(X, 0, C) --> shl X, C
2587 // fshl(X, undef, C) --> shl X, C
2588 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2589 return BinaryOperator::CreateShl(Op0, ShAmtC);
2590
2591 // fshl(0, X, C) --> lshr X, (BW-C)
2592 // fshl(undef, X, C) --> lshr X, (BW-C)
2593 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2594 return BinaryOperator::CreateLShr(Op1,
2595 ConstantExpr::getSub(WidthC, ShAmtC));
2596
2597 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2598 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2599 Module *Mod = II->getModule();
2600 Function *Bswap =
2601 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2602 return CallInst::Create(Bswap, { Op0 });
2603 }
2604 if (Instruction *BitOp =
2605 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2606 /*MatchBitReversals*/ true))
2607 return BitOp;
2608
2609 // R = fshl(X, X, C2)
2610 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2611 Value *InnerOp;
2612 const APInt *ShAmtInnerC, *ShAmtOuterC;
2613 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2614 m_APInt(ShAmtInnerC))) &&
2615 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2616 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2617 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2618 if (Modulo.isZero())
2619 return replaceInstUsesWith(*II, InnerOp);
2620 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2622 {InnerOp, InnerOp, ModuloC});
2623 }
2624 }
2625
2626 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2627 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2628 // if BitWidth is a power-of-2
2629 Value *Y;
2630 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2631 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2632 Module *Mod = II->getModule();
2634 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2635 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2636 }
2637
2638 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2639 // power-of-2
2640 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2641 match(Op1, m_ZeroInt())) {
2642 Value *Op2 = II->getArgOperand(2);
2643 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2644 return BinaryOperator::CreateShl(Op0, And);
2645 }
2646
2647 // Left or right might be masked.
2649 return &CI;
2650
2651 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2652 // so only the low bits of the shift amount are demanded if the bitwidth is
2653 // a power-of-2.
2654 if (!isPowerOf2_32(BitWidth))
2655 break;
2657 KnownBits Op2Known(BitWidth);
2658 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2659 return &CI;
2660 break;
2661 }
2662 case Intrinsic::ptrmask: {
2663 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2664 KnownBits Known(BitWidth);
2666 return II;
2667
2668 Value *InnerPtr, *InnerMask;
2669 bool Changed = false;
2670 // Combine:
2671 // (ptrmask (ptrmask p, A), B)
2672 // -> (ptrmask p, (and A, B))
2673 if (match(II->getArgOperand(0),
2675 m_Value(InnerMask))))) {
2676 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2677 "Mask types must match");
2678 // TODO: If InnerMask == Op1, we could copy attributes from inner
2679 // callsite -> outer callsite.
2680 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2681 replaceOperand(CI, 0, InnerPtr);
2682 replaceOperand(CI, 1, NewMask);
2683 Changed = true;
2684 }
2685
2686 // See if we can deduce non-null.
2687 if (!CI.hasRetAttr(Attribute::NonNull) &&
2688 (Known.isNonZero() ||
2689 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2690 CI.addRetAttr(Attribute::NonNull);
2691 Changed = true;
2692 }
2693
2694 unsigned NewAlignmentLog =
2696 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2697 // Known bits will capture if we had alignment information associated with
2698 // the pointer argument.
2699 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2701 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2702 Changed = true;
2703 }
2704 if (Changed)
2705 return &CI;
2706 break;
2707 }
2708 case Intrinsic::uadd_with_overflow:
2709 case Intrinsic::sadd_with_overflow: {
2710 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2711 return I;
2712
2713 // Given 2 constant operands whose sum does not overflow:
2714 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2715 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2716 Value *X;
2717 const APInt *C0, *C1;
2718 Value *Arg0 = II->getArgOperand(0);
2719 Value *Arg1 = II->getArgOperand(1);
2720 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2721 bool HasNWAdd = IsSigned
2722 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2723 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2724 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2725 bool Overflow;
2726 APInt NewC =
2727 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2728 if (!Overflow)
2729 return replaceInstUsesWith(
2730 *II, Builder.CreateBinaryIntrinsic(
2731 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2732 }
2733 break;
2734 }
2735
2736 case Intrinsic::umul_with_overflow:
2737 case Intrinsic::smul_with_overflow:
2738 case Intrinsic::usub_with_overflow:
2739 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2740 return I;
2741 break;
2742
2743 case Intrinsic::ssub_with_overflow: {
2744 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2745 return I;
2746
2747 Constant *C;
2748 Value *Arg0 = II->getArgOperand(0);
2749 Value *Arg1 = II->getArgOperand(1);
2750 // Given a constant C that is not the minimum signed value
2751 // for an integer of a given bit width:
2752 //
2753 // ssubo X, C -> saddo X, -C
2754 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2755 Value *NegVal = ConstantExpr::getNeg(C);
2756 // Build a saddo call that is equivalent to the discovered
2757 // ssubo call.
2758 return replaceInstUsesWith(
2759 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2760 Arg0, NegVal));
2761 }
2762
2763 break;
2764 }
2765
2766 case Intrinsic::uadd_sat:
2767 case Intrinsic::sadd_sat:
2768 case Intrinsic::usub_sat:
2769 case Intrinsic::ssub_sat: {
2771 Type *Ty = SI->getType();
2772 Value *Arg0 = SI->getLHS();
2773 Value *Arg1 = SI->getRHS();
2774
2775 // Make use of known overflow information.
2776 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2777 Arg0, Arg1, SI);
2778 switch (OR) {
2780 break;
2782 if (SI->isSigned())
2783 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2784 else
2785 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2787 unsigned BitWidth = Ty->getScalarSizeInBits();
2788 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2789 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2790 }
2792 unsigned BitWidth = Ty->getScalarSizeInBits();
2793 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2794 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2795 }
2796 }
2797
2798 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2799 // which after that:
2800 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2801 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2802 Constant *C, *C1;
2803 Value *A;
2804 if (IID == Intrinsic::usub_sat &&
2805 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2806 match(Arg1, m_ImmConstant(C1))) {
2807 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2808 auto *NewSub =
2809 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2810 return replaceInstUsesWith(*SI, NewSub);
2811 }
2812
2813 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2814 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2815 C->isNotMinSignedValue()) {
2816 Value *NegVal = ConstantExpr::getNeg(C);
2817 return replaceInstUsesWith(
2818 *II, Builder.CreateBinaryIntrinsic(
2819 Intrinsic::sadd_sat, Arg0, NegVal));
2820 }
2821
2822 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2823 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2824 // if Val and Val2 have the same sign
2825 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2826 Value *X;
2827 const APInt *Val, *Val2;
2828 APInt NewVal;
2829 bool IsUnsigned =
2830 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2831 if (Other->getIntrinsicID() == IID &&
2832 match(Arg1, m_APInt(Val)) &&
2833 match(Other->getArgOperand(0), m_Value(X)) &&
2834 match(Other->getArgOperand(1), m_APInt(Val2))) {
2835 if (IsUnsigned)
2836 NewVal = Val->uadd_sat(*Val2);
2837 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2838 bool Overflow;
2839 NewVal = Val->sadd_ov(*Val2, Overflow);
2840 if (Overflow) {
2841 // Both adds together may add more than SignedMaxValue
2842 // without saturating the final result.
2843 break;
2844 }
2845 } else {
2846 // Cannot fold saturated addition with different signs.
2847 break;
2848 }
2849
2850 return replaceInstUsesWith(
2851 *II, Builder.CreateBinaryIntrinsic(
2852 IID, X, ConstantInt::get(II->getType(), NewVal)));
2853 }
2854 }
2855 break;
2856 }
2857
2858 case Intrinsic::minnum:
2859 case Intrinsic::maxnum:
2860 case Intrinsic::minimumnum:
2861 case Intrinsic::maximumnum:
2862 case Intrinsic::minimum:
2863 case Intrinsic::maximum: {
2864 Value *Arg0 = II->getArgOperand(0);
2865 Value *Arg1 = II->getArgOperand(1);
2866 Value *X, *Y;
2867 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2868 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2869 // If both operands are negated, invert the call and negate the result:
2870 // min(-X, -Y) --> -(max(X, Y))
2871 // max(-X, -Y) --> -(min(X, Y))
2872 Intrinsic::ID NewIID;
2873 switch (IID) {
2874 case Intrinsic::maxnum:
2875 NewIID = Intrinsic::minnum;
2876 break;
2877 case Intrinsic::minnum:
2878 NewIID = Intrinsic::maxnum;
2879 break;
2880 case Intrinsic::maximumnum:
2881 NewIID = Intrinsic::minimumnum;
2882 break;
2883 case Intrinsic::minimumnum:
2884 NewIID = Intrinsic::maximumnum;
2885 break;
2886 case Intrinsic::maximum:
2887 NewIID = Intrinsic::minimum;
2888 break;
2889 case Intrinsic::minimum:
2890 NewIID = Intrinsic::maximum;
2891 break;
2892 default:
2893 llvm_unreachable("unexpected intrinsic ID");
2894 }
2895 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2896 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2897 FNeg->copyIRFlags(II);
2898 return FNeg;
2899 }
2900
2901 // m(m(X, C2), C1) -> m(X, C)
2902 const APFloat *C1, *C2;
2903 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2904 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2905 ((match(M->getArgOperand(0), m_Value(X)) &&
2906 match(M->getArgOperand(1), m_APFloat(C2))) ||
2907 (match(M->getArgOperand(1), m_Value(X)) &&
2908 match(M->getArgOperand(0), m_APFloat(C2))))) {
2909 APFloat Res(0.0);
2910 switch (IID) {
2911 case Intrinsic::maxnum:
2912 Res = maxnum(*C1, *C2);
2913 break;
2914 case Intrinsic::minnum:
2915 Res = minnum(*C1, *C2);
2916 break;
2917 case Intrinsic::maximumnum:
2918 Res = maximumnum(*C1, *C2);
2919 break;
2920 case Intrinsic::minimumnum:
2921 Res = minimumnum(*C1, *C2);
2922 break;
2923 case Intrinsic::maximum:
2924 Res = maximum(*C1, *C2);
2925 break;
2926 case Intrinsic::minimum:
2927 Res = minimum(*C1, *C2);
2928 break;
2929 default:
2930 llvm_unreachable("unexpected intrinsic ID");
2931 }
2932 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2933 // was a simplification (so Arg0 and its original flags could
2934 // propagate?)
2935 Value *V = Builder.CreateBinaryIntrinsic(
2936 IID, X, ConstantFP::get(Arg0->getType(), Res),
2938 return replaceInstUsesWith(*II, V);
2939 }
2940 }
2941
2942 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2943 if (match(Arg0, m_FPExt(m_Value(X))) && match(Arg1, m_FPExt(m_Value(Y))) &&
2944 (Arg0->hasOneUse() || Arg1->hasOneUse()) &&
2945 X->getType() == Y->getType()) {
2946 Value *NewCall =
2947 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2948 return new FPExtInst(NewCall, II->getType());
2949 }
2950
2951 // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
2952 Constant *C;
2953 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2954 match(Arg1, m_ImmConstant(C))) {
2955 if (Constant *TruncC =
2956 getLosslessInvCast(C, X->getType(), Instruction::FPExt, DL)) {
2957 Value *NewCall =
2958 Builder.CreateBinaryIntrinsic(IID, X, TruncC, II, II->getName());
2959 return new FPExtInst(NewCall, II->getType());
2960 }
2961 }
2962
2963 // max X, -X --> fabs X
2964 // min X, -X --> -(fabs X)
2965 // TODO: Remove one-use limitation? That is obviously better for max,
2966 // hence why we don't check for one-use for that. However,
2967 // it would be an extra instruction for min (fnabs), but
2968 // that is still likely better for analysis and codegen.
2969 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2970 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2971 return Op0->hasOneUse() ||
2972 (IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
2973 IID != Intrinsic::minimumnum);
2974 return false;
2975 };
2976
2977 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2978 Value *R = Builder.CreateFAbs(X, II);
2979 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
2980 IID == Intrinsic::minimumnum)
2981 R = Builder.CreateFNegFMF(R, II);
2982 return replaceInstUsesWith(*II, R);
2983 }
2984
2985 break;
2986 }
2987 case Intrinsic::matrix_multiply: {
2988 // Optimize negation in matrix multiplication.
2989
2990 // -A * -B -> A * B
2991 Value *A, *B;
2992 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2993 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2994 replaceOperand(*II, 0, A);
2995 replaceOperand(*II, 1, B);
2996 return II;
2997 }
2998
2999 Value *Op0 = II->getOperand(0);
3000 Value *Op1 = II->getOperand(1);
3001 Value *OpNotNeg, *NegatedOp;
3002 unsigned NegatedOpArg, OtherOpArg;
3003 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
3004 NegatedOp = Op0;
3005 NegatedOpArg = 0;
3006 OtherOpArg = 1;
3007 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
3008 NegatedOp = Op1;
3009 NegatedOpArg = 1;
3010 OtherOpArg = 0;
3011 } else
3012 // Multiplication doesn't have a negated operand.
3013 break;
3014
3015 // Only optimize if the negated operand has only one use.
3016 if (!NegatedOp->hasOneUse())
3017 break;
3018
3019 Value *OtherOp = II->getOperand(OtherOpArg);
3020 VectorType *RetTy = cast<VectorType>(II->getType());
3021 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
3022 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
3023 ElementCount NegatedCount = NegatedOpTy->getElementCount();
3024 ElementCount OtherCount = OtherOpTy->getElementCount();
3025 ElementCount RetCount = RetTy->getElementCount();
3026 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
3027 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
3028 ElementCount::isKnownLT(OtherCount, RetCount)) {
3029 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
3030 replaceOperand(*II, NegatedOpArg, OpNotNeg);
3031 replaceOperand(*II, OtherOpArg, InverseOtherOp);
3032 return II;
3033 }
3034 // (-A) * B -> -(A * B), if it is cheaper to negate the result
3035 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
3036 SmallVector<Value *, 5> NewArgs(II->args());
3037 NewArgs[NegatedOpArg] = OpNotNeg;
3038 Instruction *NewMul =
3039 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
3040 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
3041 }
3042 break;
3043 }
3044 case Intrinsic::fmuladd: {
3045 // Try to simplify the underlying FMul.
3046 if (Value *V =
3047 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
3048 II->getFastMathFlags(), SQ.getWithInstruction(II)))
3049 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
3050 II->getFastMathFlags());
3051
3052 [[fallthrough]];
3053 }
3054 case Intrinsic::fma: {
3055 // fma fneg(x), fneg(y), z -> fma x, y, z
3056 Value *Src0 = II->getArgOperand(0);
3057 Value *Src1 = II->getArgOperand(1);
3058 Value *Src2 = II->getArgOperand(2);
3059 Value *X, *Y;
3060 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
3061 replaceOperand(*II, 0, X);
3062 replaceOperand(*II, 1, Y);
3063 return II;
3064 }
3065
3066 // fma fabs(x), fabs(x), z -> fma x, x, z
3067 if (match(Src0, m_FAbs(m_Value(X))) &&
3068 match(Src1, m_FAbs(m_Specific(X)))) {
3069 replaceOperand(*II, 0, X);
3070 replaceOperand(*II, 1, X);
3071 return II;
3072 }
3073
3074 // Try to simplify the underlying FMul. We can only apply simplifications
3075 // that do not require rounding.
3076 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
3077 SQ.getWithInstruction(II)))
3078 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
3079
3080 // fma x, y, 0 -> fmul x, y
3081 // This is always valid for -0.0, but requires nsz for +0.0 as
3082 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3083 if (match(Src2, m_NegZeroFP()) ||
3084 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3085 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
3086
3087 // fma x, -1.0, y -> fsub y, x
3088 if (match(Src1, m_SpecificFP(-1.0)))
3089 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
3090
3091 break;
3092 }
3093 case Intrinsic::copysign: {
3094 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3095 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3096 Sign, getSimplifyQuery().getWithInstruction(II))) {
3097 if (*KnownSignBit) {
3098 // If we know that the sign argument is negative, reduce to FNABS:
3099 // copysign Mag, -Sign --> fneg (fabs Mag)
3100 Value *Fabs = Builder.CreateFAbs(Mag, II);
3101 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3102 }
3103
3104 // If we know that the sign argument is positive, reduce to FABS:
3105 // copysign Mag, +Sign --> fabs Mag
3106 Value *Fabs = Builder.CreateFAbs(Mag, II);
3107 return replaceInstUsesWith(*II, Fabs);
3108 }
3109
3110 // Propagate sign argument through nested calls:
3111 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3112 Value *X;
3114 Value *CopySign =
3115 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3116 return replaceInstUsesWith(*II, CopySign);
3117 }
3118
3119 // Clear sign-bit of constant magnitude:
3120 // copysign -MagC, X --> copysign MagC, X
3121 // TODO: Support constant folding for fabs
3122 const APFloat *MagC;
3123 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3124 APFloat PosMagC = *MagC;
3125 PosMagC.clearSign();
3126 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
3127 }
3128
3129 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3130 // copysign (fabs X), Sign --> copysign X, Sign
3131 // copysign (fneg X), Sign --> copysign X, Sign
3132 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3133 return replaceOperand(*II, 0, X);
3134
3135 Type *SignEltTy = Sign->getType()->getScalarType();
3136
3137 Value *CastSrc;
3138 if (match(Sign,
3140 CastSrc->getType()->isIntOrIntVectorTy() &&
3142 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3144 APInt::getSignMask(Known.getBitWidth()), Known,
3145 SQ))
3146 return II;
3147 }
3148
3149 break;
3150 }
3151 case Intrinsic::fabs: {
3152 Value *Cond, *TVal, *FVal;
3153 Value *Arg = II->getArgOperand(0);
3154 Value *X;
3155 // fabs (-X) --> fabs (X)
3156 if (match(Arg, m_FNeg(m_Value(X)))) {
3157 Value *Fabs = Builder.CreateFAbs(X, II);
3158 return replaceInstUsesWith(CI, Fabs);
3159 }
3160
3161 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3162 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3163 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3164 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3165 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3166 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3167 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3168 SI->setFastMathFlags(II->getFastMathFlags() |
3169 cast<SelectInst>(Arg)->getFastMathFlags());
3170 // Can't copy nsz to select, as even with the nsz flag the fabs result
3171 // always has the sign bit unset.
3172 SI->setHasNoSignedZeros(false);
3173 return SI;
3174 }
3175 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3176 if (match(TVal, m_FNeg(m_Specific(FVal))))
3177 return replaceOperand(*II, 0, FVal);
3178 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3179 if (match(FVal, m_FNeg(m_Specific(TVal))))
3180 return replaceOperand(*II, 0, TVal);
3181 }
3182
3183 Value *Magnitude, *Sign;
3184 if (match(II->getArgOperand(0),
3185 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3186 // fabs (copysign x, y) -> (fabs x)
3187 Value *AbsSign = Builder.CreateFAbs(Magnitude, II);
3188 return replaceInstUsesWith(*II, AbsSign);
3189 }
3190
3191 [[fallthrough]];
3192 }
3193 case Intrinsic::ceil:
3194 case Intrinsic::floor:
3195 case Intrinsic::round:
3196 case Intrinsic::roundeven:
3197 case Intrinsic::nearbyint:
3198 case Intrinsic::rint:
3199 case Intrinsic::trunc: {
3200 Value *ExtSrc;
3201 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3202 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3203 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3204 return new FPExtInst(NarrowII, II->getType());
3205 }
3206 break;
3207 }
3208 case Intrinsic::cos:
3209 case Intrinsic::amdgcn_cos:
3210 case Intrinsic::cosh: {
3211 Value *X, *Sign;
3212 Value *Src = II->getArgOperand(0);
3213 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3214 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3215 // f(-x) --> f(x)
3216 // f(fabs(x)) --> f(x)
3217 // f(copysign(x, y)) --> f(x)
3218 // for f in {cos, cosh}
3219 return replaceOperand(*II, 0, X);
3220 }
3221 break;
3222 }
3223 case Intrinsic::sin:
3224 case Intrinsic::amdgcn_sin:
3225 case Intrinsic::sinh:
3226 case Intrinsic::tan:
3227 case Intrinsic::tanh: {
3228 Value *X;
3229 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3230 // f(-x) --> -f(x)
3231 // for f in {sin, sinh, tan, tanh}
3232 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3233 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3234 }
3235 break;
3236 }
3237 case Intrinsic::ldexp: {
3238 Value *Src = II->getArgOperand(0);
3239 Value *Exp = II->getArgOperand(1);
3240
3241 // ldexp(x, K) -> fmul x, 2^K
3242 uint64_t ConstExp;
3243 if (match(Exp, m_ConstantInt(ConstExp))) {
3244 const fltSemantics &FPTy =
3245 Src->getType()->getScalarType()->getFltSemantics();
3246
3247 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3249 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3250 // Skip overflow and underflow cases.
3251 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3252 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3253 }
3254 }
3255
3256 // ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b))
3257 //
3258 // A danger is if the first ldexp would overflow to infinity or underflow to
3259 // zero, but the combined exponent avoids it.
3260 //
3261 // We ignore this with reassoc, or if we know both exponents have the same
3262 // sign (since then we'd just double down on the over/underflow which would
3263 // occur anyway).
3264 //
3265 // ldexp can take arbitrary integer types, so we also need to ensure that
3266 // our exponent type is wide enough so that if sadd.sat(a, b) saturates,
3267 // then ldexp at the saturated exponent saturates to inf or zero as well.
3268 //
3269 // TODO: Could do better if we had range tracking for the input value
3270 // exponent. Also could broaden sign check to cover == 0 case.
3271 Value *InnerSrc;
3272 Value *InnerExp;
3274 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3275 Exp->getType() == InnerExp->getType()) {
3276 FastMathFlags FMF = II->getFastMathFlags();
3277 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3278
3279 if (ldexpSaturatingAddIsSafe(II->getType(), Exp->getType()) &&
3280 ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3281 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II)))) {
3282 Value *NewExp =
3283 Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, InnerExp, Exp);
3284 II->setArgOperand(1, NewExp);
3285 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3286 return replaceOperand(*II, 0, InnerSrc);
3287 }
3288 }
3289
3290 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3291 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3292 Value *ExtSrc;
3293 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3294 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3295 Value *Select =
3296 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3297 ConstantFP::get(II->getType(), 1.0));
3299 }
3300 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3301 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3302 Value *Select =
3303 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3304 ConstantFP::get(II->getType(), 1.0));
3306 }
3307
3308 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3309 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3310 ///
3311 // TODO: If we cared, should insert a canonicalize for x
3312 Value *SelectCond, *SelectLHS, *SelectRHS;
3313 if (match(II->getArgOperand(1),
3314 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3315 m_Value(SelectRHS))))) {
3316 Value *NewLdexp = nullptr;
3317 Value *Select = nullptr;
3318 if (match(SelectRHS, m_ZeroInt())) {
3319 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3320 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3321 } else if (match(SelectLHS, m_ZeroInt())) {
3322 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3323 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3324 }
3325
3326 if (NewLdexp) {
3327 Select->takeName(II);
3328 return replaceInstUsesWith(*II, Select);
3329 }
3330 }
3331
3332 break;
3333 }
3334 case Intrinsic::ptrauth_auth:
3335 case Intrinsic::ptrauth_resign: {
3336 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3337 // sign+auth component if the key and discriminator match.
3338 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3339 Value *Ptr = II->getArgOperand(0);
3340 Value *Key = II->getArgOperand(1);
3341 Value *Disc = II->getArgOperand(2);
3342 Value *DS = nullptr;
3343 if (auto Bundle = II->getOperandBundle(LLVMContext::OB_deactivation_symbol))
3344 DS = Bundle->Inputs[0];
3345
3346 // AuthKey will be the key we need to end up authenticating against in
3347 // whatever we replace this sequence with.
3348 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3349 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3350 Value *OtherDS = nullptr;
3351 if (auto Bundle =
3353 OtherDS = Bundle->Inputs[0];
3354 if (DS != OtherDS)
3355 break;
3356
3357 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3358 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3359 break;
3360 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3361 // The resign intrinsic does not support deactivation symbols.
3362 assert(!DS);
3363 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3364 break;
3365 AuthKey = CI->getArgOperand(1);
3366 AuthDisc = CI->getArgOperand(2);
3367 } else
3368 break;
3369 BasePtr = CI->getArgOperand(0);
3370 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3371 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3372 // our purposes, so check for that too.
3373 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3374 if (!CPA || DS || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3375 break;
3376
3377 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3378 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3379 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3380 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3381 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3382 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3383 SignDisc, /*AddrDisc=*/Null,
3384 /*DeactivationSymbol=*/Null);
3386 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3387 return eraseInstFromFunction(*II);
3388 }
3389
3390 // auth(ptrauth(p,k,d),k,d) -> p
3391 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3392 } else
3393 break;
3394
3395 unsigned NewIntrin;
3396 if (AuthKey && NeedSign) {
3397 // resign(0,1) + resign(1,2) = resign(0, 2)
3398 NewIntrin = Intrinsic::ptrauth_resign;
3399 } else if (AuthKey) {
3400 // resign(0,1) + auth(1) = auth(0)
3401 NewIntrin = Intrinsic::ptrauth_auth;
3402 } else if (NeedSign) {
3403 // sign(0) + resign(0, 1) = sign(1)
3404 NewIntrin = Intrinsic::ptrauth_sign;
3405 } else {
3406 // sign(0) + auth(0) = nop
3407 replaceInstUsesWith(*II, BasePtr);
3408 return eraseInstFromFunction(*II);
3409 }
3410
3411 SmallVector<Value *, 4> CallArgs;
3412 CallArgs.push_back(BasePtr);
3413 if (AuthKey) {
3414 CallArgs.push_back(AuthKey);
3415 CallArgs.push_back(AuthDisc);
3416 }
3417
3418 if (NeedSign) {
3419 CallArgs.push_back(II->getArgOperand(3));
3420 CallArgs.push_back(II->getArgOperand(4));
3421 }
3422
3423 std::vector<OperandBundleDef> Bundles;
3424 if (DS)
3425 Bundles.push_back(OperandBundleDef("deactivation-symbol", DS));
3426
3427 Function *NewFn =
3428 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3429 return CallInst::Create(NewFn, CallArgs, Bundles);
3430 }
3431 case Intrinsic::arm_neon_vtbl1:
3432 case Intrinsic::arm_neon_vtbl2:
3433 case Intrinsic::arm_neon_vtbl3:
3434 case Intrinsic::arm_neon_vtbl4:
3435 case Intrinsic::aarch64_neon_tbl1:
3436 case Intrinsic::aarch64_neon_tbl2:
3437 case Intrinsic::aarch64_neon_tbl3:
3438 case Intrinsic::aarch64_neon_tbl4:
3439 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3440 case Intrinsic::arm_neon_vtbx1:
3441 case Intrinsic::arm_neon_vtbx2:
3442 case Intrinsic::arm_neon_vtbx3:
3443 case Intrinsic::arm_neon_vtbx4:
3444 case Intrinsic::aarch64_neon_tbx1:
3445 case Intrinsic::aarch64_neon_tbx2:
3446 case Intrinsic::aarch64_neon_tbx3:
3447 case Intrinsic::aarch64_neon_tbx4:
3448 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3449
3450 case Intrinsic::arm_neon_vmulls:
3451 case Intrinsic::arm_neon_vmullu:
3452 case Intrinsic::aarch64_neon_smull:
3453 case Intrinsic::aarch64_neon_umull: {
3454 Value *Arg0 = II->getArgOperand(0);
3455 Value *Arg1 = II->getArgOperand(1);
3456
3457 // Handle mul by zero first:
3459 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3460 }
3461
3462 // Check for constant LHS & RHS - in this case we just simplify.
3463 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3464 IID == Intrinsic::aarch64_neon_umull);
3465 VectorType *NewVT = cast<VectorType>(II->getType());
3466 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3467 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3468 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3469 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3470 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3471 }
3472
3473 // Couldn't simplify - canonicalize constant to the RHS.
3474 std::swap(Arg0, Arg1);
3475 }
3476
3477 // Handle mul by one:
3478 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3479 if (ConstantInt *Splat =
3480 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3481 if (Splat->isOne())
3482 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3483 /*isSigned=*/!Zext);
3484
3485 break;
3486 }
3487 case Intrinsic::arm_neon_aesd:
3488 case Intrinsic::arm_neon_aese:
3489 case Intrinsic::aarch64_crypto_aesd:
3490 case Intrinsic::aarch64_crypto_aese:
3491 case Intrinsic::aarch64_sve_aesd:
3492 case Intrinsic::aarch64_sve_aese: {
3493 Value *DataArg = II->getArgOperand(0);
3494 Value *KeyArg = II->getArgOperand(1);
3495
3496 // Accept zero on either operand.
3497 if (!match(KeyArg, m_ZeroInt()))
3498 std::swap(KeyArg, DataArg);
3499
3500 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3501 Value *Data, *Key;
3502 if (match(KeyArg, m_ZeroInt()) &&
3503 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3504 replaceOperand(*II, 0, Data);
3505 replaceOperand(*II, 1, Key);
3506 return II;
3507 }
3508 break;
3509 }
3510 case Intrinsic::arm_neon_vshifts:
3511 case Intrinsic::arm_neon_vshiftu:
3512 case Intrinsic::aarch64_neon_sshl:
3513 case Intrinsic::aarch64_neon_ushl:
3514 return foldNeonShift(II, *this);
3515 case Intrinsic::hexagon_V6_vandvrt:
3516 case Intrinsic::hexagon_V6_vandvrt_128B: {
3517 // Simplify Q -> V -> Q conversion.
3518 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3519 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3520 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3521 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3522 break;
3523 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3524 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3525 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3526 // Check if every byte has common bits in Bytes and Mask.
3527 uint64_t C = Bytes1 & Mask1;
3528 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3529 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3530 }
3531 break;
3532 }
3533 case Intrinsic::stackrestore: {
3534 enum class ClassifyResult {
3535 None,
3536 Alloca,
3537 StackRestore,
3538 CallWithSideEffects,
3539 };
3540 auto Classify = [](const Instruction *I) {
3541 if (isa<AllocaInst>(I))
3542 return ClassifyResult::Alloca;
3543
3544 if (auto *CI = dyn_cast<CallInst>(I)) {
3545 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3546 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3547 return ClassifyResult::StackRestore;
3548
3549 if (II->mayHaveSideEffects())
3550 return ClassifyResult::CallWithSideEffects;
3551 } else {
3552 // Consider all non-intrinsic calls to be side effects
3553 return ClassifyResult::CallWithSideEffects;
3554 }
3555 }
3556
3557 return ClassifyResult::None;
3558 };
3559
3560 // If the stacksave and the stackrestore are in the same BB, and there is
3561 // no intervening call, alloca, or stackrestore of a different stacksave,
3562 // remove the restore. This can happen when variable allocas are DCE'd.
3563 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3564 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3565 SS->getParent() == II->getParent()) {
3566 BasicBlock::iterator BI(SS);
3567 bool CannotRemove = false;
3568 for (++BI; &*BI != II; ++BI) {
3569 switch (Classify(&*BI)) {
3570 case ClassifyResult::None:
3571 // So far so good, look at next instructions.
3572 break;
3573
3574 case ClassifyResult::StackRestore:
3575 // If we found an intervening stackrestore for a different
3576 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3577 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3578 CannotRemove = true;
3579 break;
3580
3581 case ClassifyResult::Alloca:
3582 case ClassifyResult::CallWithSideEffects:
3583 // If we found an alloca, a non-intrinsic call, or an intrinsic
3584 // call with side effects, we can't remove the stackrestore.
3585 CannotRemove = true;
3586 break;
3587 }
3588 if (CannotRemove)
3589 break;
3590 }
3591
3592 if (!CannotRemove)
3593 return eraseInstFromFunction(CI);
3594 }
3595 }
3596
3597 // Scan down this block to see if there is another stack restore in the
3598 // same block without an intervening call/alloca.
3600 Instruction *TI = II->getParent()->getTerminator();
3601 bool CannotRemove = false;
3602 for (++BI; &*BI != TI; ++BI) {
3603 switch (Classify(&*BI)) {
3604 case ClassifyResult::None:
3605 // So far so good, look at next instructions.
3606 break;
3607
3608 case ClassifyResult::StackRestore:
3609 // If there is a stackrestore below this one, remove this one.
3610 return eraseInstFromFunction(CI);
3611
3612 case ClassifyResult::Alloca:
3613 case ClassifyResult::CallWithSideEffects:
3614 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3615 // with side effects (such as llvm.stacksave and llvm.read_register),
3616 // we can't remove the stack restore.
3617 CannotRemove = true;
3618 break;
3619 }
3620 if (CannotRemove)
3621 break;
3622 }
3623
3624 // If the stack restore is in a return, resume, or unwind block and if there
3625 // are no allocas or calls between the restore and the return, nuke the
3626 // restore.
3627 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3628 return eraseInstFromFunction(CI);
3629 break;
3630 }
3631 case Intrinsic::lifetime_end:
3632 // Asan needs to poison memory to detect invalid access which is possible
3633 // even for empty lifetime range.
3634 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3635 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3636 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress) ||
3637 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag))
3638 break;
3639
3640 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3641 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3642 }))
3643 return nullptr;
3644 break;
3645 case Intrinsic::assume: {
3646 for (auto [Idx, OBU] : llvm::enumerate(II->operand_bundles())) {
3647 switch (getBundleAttrFromOBU(OBU)) {
3648 case BundleAttr::None:
3649 llvm_unreachable("Unexpected Attribute");
3650 case BundleAttr::Align: {
3651 // Try to remove redundant alignment assumptions.
3652 auto [Ptr, _, Alignment, Offset] = getAssumeAlignInfo(OBU);
3653
3654 if (!Alignment || !Offset || *Offset != 0)
3655 break;
3656
3657 // Remove align 1 and non-power-of-two bundles; they don't add any
3658 // useful information.
3659 if (*Alignment == 1 || !isPowerOf2_64(*Alignment))
3661
3662 // Don't try to remove align assumptions for pointers derived from
3663 // arguments. We might lose information if the function gets inline and
3664 // the align argument attribute disappears.
3665 Value *UO = getUnderlyingObject(Ptr);
3666 if (!UO || isa<Argument>(UO))
3667 break;
3668
3669 // Compute known bits for the pointer and drop the assume if the
3670 // known alignment isn't increased by it.
3671 if (computeKnownBits(Ptr, II).countMinTrailingZeros() <
3672 Log2_64(*Alignment))
3673 continue;
3675 }
3676
3677 case BundleAttr::Dereferenceable: {
3678 auto [Ptr, _, Count] = getAssumeDereferenceableInfo(OBU);
3679
3680 if (Count && *Count == 0)
3682 break;
3683 }
3684
3685 case BundleAttr::Ignore:
3687
3688 case BundleAttr::NonNull: {
3689 auto [Ptr] = llvm::getAssumeNonNullInfo(OBU);
3690
3691 // Drop assume if we can prove nonnull without it
3692 if (isKnownNonZero(Ptr, getSimplifyQuery().getWithInstruction(II)))
3694
3695 // Fold the assume into metadata if it's valid at the load
3696 if (auto *LI = dyn_cast<LoadInst>(Ptr);
3697 LI &&
3698 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3699 MDNode *MD = MDNode::get(II->getContext(), {});
3700 LI->setMetadata(LLVMContext::MD_nonnull, MD);
3701 LI->setMetadata(LLVMContext::MD_noundef, MD);
3703 }
3704
3705 if (auto *GEP = dyn_cast<GEPOperator>(Ptr);
3706 GEP && GEP->isInBounds() &&
3707 !NullPointerIsDefined(II->getFunction(),
3708 Ptr->getType()->getPointerAddressSpace())) {
3709 Builder.CreateNonnullAssumption(GEP->stripInBoundsOffsets());
3711 }
3712
3713 // TODO: apply nonnull return attributes to calls and invokes
3714 break;
3715 }
3716
3717 case BundleAttr::SeparateStorage: {
3718 auto [Ptr1, Ptr2] = getAssumeSeparateStorageInfo(OBU);
3719 // Separate storage assumptions apply to the underlying allocations, not
3720 // any particular pointer within them. When evaluating the hints for AA
3721 // purposes we getUnderlyingObject them; by precomputing the answers
3722 // here we can avoid having to do so repeatedly there.
3723 auto MaybeSimplifyHint = [&](const Use &U) {
3724 Value *Hint = U.get();
3725 // Not having a limit is safe because InstCombine removes unreachable
3726 // code.
3727 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3728 if (Hint != UnderlyingObject)
3729 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3730 };
3731 MaybeSimplifyHint(Ptr1);
3732 MaybeSimplifyHint(Ptr2);
3733 } break;
3734
3735 // TODO: Drop these assumes when they are redundant
3736 case BundleAttr::DereferenceableOrNull:
3737 case BundleAttr::NoUndef:
3738 break;
3739
3740 // This cannot be simplified
3741 case BundleAttr::Cold:
3742 break;
3743 }
3744 }
3745
3746 // If the assume has operand bundles, the folds below will never work, so
3747 // don't bother trying.
3748 if (II->hasOperandBundles())
3749 break;
3750
3751 Value *IIOperand = II->getArgOperand(0);
3752
3753 // Canonicalize assume(a && b) -> assume(a); assume(b);
3754 // Note: New assumption intrinsics created here are registered by
3755 // the InstCombineIRInserter object.
3756 Value *A, *B;
3757 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3758 Builder.CreateAssumption(A);
3759 Builder.CreateAssumption(B);
3760 return eraseInstFromFunction(*II);
3761 }
3762 // assume(!(a || b)) -> assume(!a); assume(!b);
3763 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3764 Builder.CreateAssumption(Builder.CreateNot(A));
3765 Builder.CreateAssumption(Builder.CreateNot(B));
3766 return eraseInstFromFunction(*II);
3767 }
3768
3769 // Convert nonnull assume like:
3770 // %A = icmp ne i32* %PTR, null
3771 // call void @llvm.assume(i1 %A)
3772 // into
3773 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3774 if (match(IIOperand,
3776 A->getType()->isPointerTy()) {
3777 Builder.CreateNonnullAssumption(A);
3778 return eraseInstFromFunction(*II);
3779 }
3780
3781 // Convert alignment assume like:
3782 // %B = ptrtoint i32* %A to i64
3783 // %C = and i64 %B, Constant
3784 // %D = icmp eq i64 %C, 0
3785 // call void @llvm.assume(i1 %D)
3786 // into
3787 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3788 uint64_t AlignMask = 1;
3789 if ((match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3790 match(IIOperand,
3792 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3793 m_Zero())))) {
3794 if (isPowerOf2_64(AlignMask + 1)) {
3795 uint64_t Offset = 0;
3797 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3798 /// Note: this doesn't preserve the offset information but merges
3799 /// offset and alignment.
3800 /// TODO: we can generate a GEP instead of merging the alignment with
3801 /// the offset.
3802 Builder.CreateAlignmentAssumption(getDataLayout(), A,
3803 MinAlign(Offset, AlignMask + 1));
3804 return eraseInstFromFunction(*II);
3805 }
3806 }
3807 }
3808
3809 // If there is a dominating assume with the same condition as this one,
3810 // then this one is redundant, and should be removed.
3811 KnownBits Known(1);
3812 computeKnownBits(IIOperand, Known, II);
3813 if (Known.isAllOnes())
3814 return eraseInstFromFunction(*II);
3815
3816 // assume(false) is unreachable.
3817 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3819 return eraseInstFromFunction(*II);
3820 }
3821
3822 // Update the cache of affected values for this assumption (we might be
3823 // here because we just simplified the condition).
3824 AC.updateAffectedValues(cast<AssumeInst>(II));
3825 break;
3826 }
3827 case Intrinsic::experimental_guard: {
3828 // Is this guard followed by another guard? We scan forward over a small
3829 // fixed window of instructions to handle common cases with conditions
3830 // computed between guards.
3831 Instruction *NextInst = II->getNextNode();
3832 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3833 // Note: Using context-free form to avoid compile time blow up
3834 if (!isSafeToSpeculativelyExecute(NextInst))
3835 break;
3836 NextInst = NextInst->getNextNode();
3837 }
3838 Value *NextCond = nullptr;
3839 if (match(NextInst,
3841 Value *CurrCond = II->getArgOperand(0);
3842
3843 // Remove a guard that it is immediately preceded by an identical guard.
3844 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3845 if (CurrCond != NextCond) {
3846 Instruction *MoveI = II->getNextNode();
3847 while (MoveI != NextInst) {
3848 auto *Temp = MoveI;
3849 MoveI = MoveI->getNextNode();
3850 Temp->moveBefore(II->getIterator());
3851 }
3852 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3853 }
3854 eraseInstFromFunction(*NextInst);
3855 return II;
3856 }
3857 break;
3858 }
3859 case Intrinsic::vector_insert: {
3860 Value *Vec = II->getArgOperand(0);
3861 Value *SubVec = II->getArgOperand(1);
3862 Value *Idx = II->getArgOperand(2);
3863 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3864 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3865 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3866
3867 // Only canonicalize if the destination vector, Vec, and SubVec are all
3868 // fixed vectors.
3869 if (DstTy && VecTy && SubVecTy) {
3870 unsigned DstNumElts = DstTy->getNumElements();
3871 unsigned VecNumElts = VecTy->getNumElements();
3872 unsigned SubVecNumElts = SubVecTy->getNumElements();
3873 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3874
3875 // An insert that entirely overwrites Vec with SubVec is a nop.
3876 if (VecNumElts == SubVecNumElts)
3877 return replaceInstUsesWith(CI, SubVec);
3878
3879 // Widen SubVec into a vector of the same width as Vec, since
3880 // shufflevector requires the two input vectors to be the same width.
3881 // Elements beyond the bounds of SubVec within the widened vector are
3882 // undefined.
3883 SmallVector<int, 8> WidenMask;
3884 unsigned i;
3885 for (i = 0; i != SubVecNumElts; ++i)
3886 WidenMask.push_back(i);
3887 for (; i != VecNumElts; ++i)
3888 WidenMask.push_back(PoisonMaskElem);
3889
3890 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3891
3893 for (unsigned i = 0; i != IdxN; ++i)
3894 Mask.push_back(i);
3895 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3896 Mask.push_back(i);
3897 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3898 Mask.push_back(i);
3899
3900 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3901 return replaceInstUsesWith(CI, Shuffle);
3902 }
3903 break;
3904 }
3905 case Intrinsic::vector_extract: {
3906 Value *Vec = II->getArgOperand(0);
3907 Value *Idx = II->getArgOperand(1);
3908
3909 Type *ReturnType = II->getType();
3910 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3911 // ExtractIdx)
3912 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3913 Value *InsertTuple, *InsertIdx, *InsertValue;
3915 m_Value(InsertValue),
3916 m_Value(InsertIdx))) &&
3917 InsertValue->getType() == ReturnType) {
3918 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3919 // Case where we get the same index right after setting it.
3920 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3921 // InsertValue
3922 if (ExtractIdx == Index)
3923 return replaceInstUsesWith(CI, InsertValue);
3924 // If we are getting a different index than what was set in the
3925 // insert.vector intrinsic. We can just set the input tuple to the one up
3926 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3927 // InsertIndex), ExtractIndex)
3928 // --> extract.vector(InsertTuple, ExtractIndex)
3929 else
3930 return replaceOperand(CI, 0, InsertTuple);
3931 }
3932
3933 ConstantInt *ALMUpperBound;
3935 m_Value(), m_ConstantInt(ALMUpperBound)))) {
3936 const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
3937 unsigned VScaleMin = Attrs.getVScaleRangeMin();
3938 unsigned ScaleFactor =
3939 cast<VectorType>(ReturnType)->isScalableTy() ? VScaleMin : 1;
3940 if (ExtractIdx * ScaleFactor >= ALMUpperBound->getZExtValue())
3941 return replaceInstUsesWith(CI,
3942 ConstantVector::getNullValue(ReturnType));
3943 }
3944
3945 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3946 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3947
3948 if (DstTy && VecTy) {
3949 auto DstEltCnt = DstTy->getElementCount();
3950 auto VecEltCnt = VecTy->getElementCount();
3951 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3952
3953 // Extracting the entirety of Vec is a nop.
3954 if (DstEltCnt == VecTy->getElementCount()) {
3955 replaceInstUsesWith(CI, Vec);
3956 return eraseInstFromFunction(CI);
3957 }
3958
3959 // Only canonicalize to shufflevector if the destination vector and
3960 // Vec are fixed vectors.
3961 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3962 break;
3963
3965 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3966 Mask.push_back(IdxN + i);
3967
3968 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
3969 return replaceInstUsesWith(CI, Shuffle);
3970 }
3971 break;
3972 }
3973 case Intrinsic::experimental_vp_reverse: {
3974 Value *X;
3975 Value *Vec = II->getArgOperand(0);
3976 Value *Mask = II->getArgOperand(1);
3977 if (!match(Mask, m_AllOnes()))
3978 break;
3979 Value *EVL = II->getArgOperand(2);
3980 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3981 // rev(unop rev(X)) --> unop X
3982 if (match(Vec,
3984 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
3985 auto *OldUnOp = cast<UnaryOperator>(Vec);
3987 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
3988 II->getIterator());
3989 return replaceInstUsesWith(CI, NewUnOp);
3990 }
3991 break;
3992 }
3993 case Intrinsic::vector_reduce_or:
3994 case Intrinsic::vector_reduce_and: {
3995 // Canonicalize logical or/and reductions:
3996 // Or reduction for i1 is represented as:
3997 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3998 // %res = cmp ne iReduxWidth %val, 0
3999 // And reduction for i1 is represented as:
4000 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4001 // %res = cmp eq iReduxWidth %val, 11111
4002 Value *Arg = II->getArgOperand(0);
4003 Value *Vect;
4004
4005 if (Value *NewOp =
4006 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4007 replaceUse(II->getOperandUse(0), NewOp);
4008 return II;
4009 }
4010
4011 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4012 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4013 if (FTy->getElementType() == Builder.getInt1Ty()) {
4014 Value *Res = Builder.CreateBitCast(
4015 Vect, Builder.getIntNTy(FTy->getNumElements()));
4016 if (IID == Intrinsic::vector_reduce_and) {
4017 Res = Builder.CreateICmpEQ(
4019 } else {
4020 assert(IID == Intrinsic::vector_reduce_or &&
4021 "Expected or reduction.");
4022 Res = Builder.CreateIsNotNull(Res);
4023 }
4024 if (Arg != Vect)
4025 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4026 II->getType());
4027 return replaceInstUsesWith(CI, Res);
4028 }
4029 }
4030 [[fallthrough]];
4031 }
4032 case Intrinsic::vector_reduce_add: {
4033 if (IID == Intrinsic::vector_reduce_add) {
4034 // Convert vector_reduce_add(ZExt(<n x i1>)) to
4035 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4036 // Convert vector_reduce_add(SExt(<n x i1>)) to
4037 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4038 // Convert vector_reduce_add(<n x i1>) to
4039 // Trunc(ctpop(bitcast <n x i1> to in)).
4040 Value *Arg = II->getArgOperand(0);
4041 Value *Vect;
4042
4043 if (Value *NewOp =
4044 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4045 replaceUse(II->getOperandUse(0), NewOp);
4046 return II;
4047 }
4048
4049 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4050 if (Value *Splat = getSplatValue(Arg)) {
4051 ElementCount VecToReduceCount =
4052 cast<VectorType>(Arg->getType())->getElementCount();
4053 if (VecToReduceCount.isFixed()) {
4054 unsigned VectorSize = VecToReduceCount.getFixedValue();
4055 return BinaryOperator::CreateMul(
4056 Splat,
4057 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
4058 /*ImplicitTrunc=*/true));
4059 }
4060 }
4061
4062 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4063 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4064 if (FTy->getElementType() == Builder.getInt1Ty()) {
4065 Value *V = Builder.CreateBitCast(
4066 Vect, Builder.getIntNTy(FTy->getNumElements()));
4067 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
4068 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
4069 if (Arg != Vect &&
4070 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
4071 Res = Builder.CreateNeg(Res);
4072 return replaceInstUsesWith(CI, Res);
4073 }
4074 }
4075 }
4076 [[fallthrough]];
4077 }
4078 case Intrinsic::vector_reduce_xor: {
4079 if (IID == Intrinsic::vector_reduce_xor) {
4080 // Exclusive disjunction reduction over the vector with
4081 // (potentially-extended) i1 element type is actually a
4082 // (potentially-extended) arithmetic `add` reduction over the original
4083 // non-extended value:
4084 // vector_reduce_xor(?ext(<n x i1>))
4085 // -->
4086 // ?ext(vector_reduce_add(<n x i1>))
4087 Value *Arg = II->getArgOperand(0);
4088 Value *Vect;
4089
4090 if (Value *NewOp =
4091 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4092 replaceUse(II->getOperandUse(0), NewOp);
4093 return II;
4094 }
4095
4096 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4097 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4098 if (VTy->getElementType() == Builder.getInt1Ty()) {
4099 Value *Res = Builder.CreateAddReduce(Vect);
4100 if (Arg != Vect)
4101 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4102 II->getType());
4103 return replaceInstUsesWith(CI, Res);
4104 }
4105 }
4106 }
4107 [[fallthrough]];
4108 }
4109 case Intrinsic::vector_reduce_mul: {
4110 if (IID == Intrinsic::vector_reduce_mul) {
4111 Value *Arg = II->getArgOperand(0);
4112
4113 if (Value *NewOp =
4114 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4115 replaceUse(II->getOperandUse(0), NewOp);
4116 return II;
4117 }
4118
4119 // vector_reduce_mul(zext(<n x i1>)), or
4120 // vector_reduce_mul(sext(<n x i1>)) (if n is even) -->
4121 // zext(vector_reduce_and(<n x i1>)).
4122 // (The sext case doesn't work if n is odd because multiplying an odd
4123 // number of -1's produces -1, not 1.)
4124 Value *Vect;
4125 bool IsZext = match(Arg, m_ZExt(m_Value(Vect))) &&
4126 Vect->getType()->isIntOrIntVectorTy(1);
4127 bool IsSext =
4128 match(Arg, m_SExt(m_Value(Vect))) &&
4129 Vect->getType()->isIntOrIntVectorTy(1) &&
4130 cast<VectorType>(Vect->getType())->getElementCount().isKnownEven();
4131 if (IsZext || IsSext) {
4132 Value *Res = Builder.CreateAndReduce(Vect);
4133 return CastInst::Create(Instruction::ZExt, Res, II->getType());
4134 }
4135
4136 // vector_reduce_mul(<n x i1>) --> vector_reduce_and(<n x i1>)
4137 if (Arg->getType()->isIntOrIntVectorTy(1))
4138 return replaceInstUsesWith(CI, Builder.CreateAndReduce(Arg));
4139 }
4140 [[fallthrough]];
4141 }
4142 case Intrinsic::vector_reduce_umin:
4143 case Intrinsic::vector_reduce_umax: {
4144 if (IID == Intrinsic::vector_reduce_umin ||
4145 IID == Intrinsic::vector_reduce_umax) {
4146 // UMin/UMax reduction over the vector with (potentially-extended)
4147 // i1 element type is actually a (potentially-extended)
4148 // logical `and`/`or` reduction over the original non-extended value:
4149 // vector_reduce_u{min,max}(?ext(<n x i1>))
4150 // -->
4151 // ?ext(vector_reduce_{and,or}(<n x i1>))
4152 Value *Arg = II->getArgOperand(0);
4153 Value *Vect;
4154
4155 if (Value *NewOp =
4156 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4157 replaceUse(II->getOperandUse(0), NewOp);
4158 return II;
4159 }
4160
4161 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4162 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4163 if (VTy->getElementType() == Builder.getInt1Ty()) {
4164 Value *Res = IID == Intrinsic::vector_reduce_umin
4165 ? Builder.CreateAndReduce(Vect)
4166 : Builder.CreateOrReduce(Vect);
4167 if (Arg != Vect)
4168 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4169 II->getType());
4170 return replaceInstUsesWith(CI, Res);
4171 }
4172 }
4173 }
4174 [[fallthrough]];
4175 }
4176 case Intrinsic::vector_reduce_smin:
4177 case Intrinsic::vector_reduce_smax: {
4178 if (IID == Intrinsic::vector_reduce_smin ||
4179 IID == Intrinsic::vector_reduce_smax) {
4180 // SMin/SMax reduction over the vector with (potentially-extended)
4181 // i1 element type is actually a (potentially-extended)
4182 // logical `and`/`or` reduction over the original non-extended value:
4183 // vector_reduce_s{min,max}(<n x i1>)
4184 // -->
4185 // vector_reduce_{or,and}(<n x i1>)
4186 // and
4187 // vector_reduce_s{min,max}(sext(<n x i1>))
4188 // -->
4189 // sext(vector_reduce_{or,and}(<n x i1>))
4190 // and
4191 // vector_reduce_s{min,max}(zext(<n x i1>))
4192 // -->
4193 // zext(vector_reduce_{and,or}(<n x i1>))
4194 Value *Arg = II->getArgOperand(0);
4195 Value *Vect;
4196
4197 if (Value *NewOp =
4198 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4199 replaceUse(II->getOperandUse(0), NewOp);
4200 return II;
4201 }
4202
4203 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4204 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4205 if (VTy->getElementType() == Builder.getInt1Ty()) {
4206 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4207 if (Arg != Vect)
4208 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4209 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4210 (ExtOpc == Instruction::CastOps::ZExt))
4211 ? Builder.CreateAndReduce(Vect)
4212 : Builder.CreateOrReduce(Vect);
4213 if (Arg != Vect)
4214 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4215 return replaceInstUsesWith(CI, Res);
4216 }
4217 }
4218 }
4219 [[fallthrough]];
4220 }
4221 case Intrinsic::vector_reduce_fmax:
4222 case Intrinsic::vector_reduce_fmin:
4223 case Intrinsic::vector_reduce_fadd:
4224 case Intrinsic::vector_reduce_fmul: {
4225 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4226 IID != Intrinsic::vector_reduce_fmul) ||
4227 II->hasAllowReassoc();
4228 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4229 IID == Intrinsic::vector_reduce_fmul)
4230 ? 1
4231 : 0;
4232 Value *Arg = II->getArgOperand(ArgIdx);
4233 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4234 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4235 return nullptr;
4236 }
4237 break;
4238 }
4239 case Intrinsic::is_fpclass: {
4240 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4241 return I;
4242 break;
4243 }
4244 case Intrinsic::threadlocal_address: {
4245 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4246 MaybeAlign Align = II->getRetAlign();
4247 if (MinAlign > Align.valueOrOne()) {
4248 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4249 return II;
4250 }
4251 break;
4252 }
4253 case Intrinsic::fptoui_sat:
4254 case Intrinsic::fptosi_sat:
4255 if (Instruction *I = foldItoFPtoI(*II))
4256 return I;
4257 break;
4258 case Intrinsic::frexp: {
4259 // frexp(frexp(x).fract) -> { frexp(x).fract, 0 }: the fraction operand is
4260 // already normalized, so the first result is idempotent and the second is
4261 // zero.
4262 if (match(II->getArgOperand(0),
4264 Value *Res = Builder.CreateInsertValue(PoisonValue::get(II->getType()),
4265 II->getArgOperand(0), 0);
4266 Res = Builder.CreateInsertValue(
4267 Res, Constant::getNullValue(II->getType()->getStructElementType(1)),
4268 1);
4269 return replaceInstUsesWith(*II, Res);
4270 }
4271 break;
4272 }
4273 case Intrinsic::get_active_lane_mask: {
4274 const APInt *Op0, *Op1;
4275 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4276 match(II->getOperand(1), m_APInt(Op1))) {
4277 Type *OpTy = II->getOperand(0)->getType();
4278 return replaceInstUsesWith(
4279 *II, Builder.CreateIntrinsic(
4280 II->getType(), Intrinsic::get_active_lane_mask,
4281 {Constant::getNullValue(OpTy),
4282 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4283 }
4284 break;
4285 }
4286 case Intrinsic::experimental_get_vector_length: {
4287 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4288 unsigned BitWidth =
4289 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4290 II->getType()->getScalarSizeInBits());
4291 ConstantRange Cnt =
4292 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4293 SQ.getWithInstruction(II))
4295 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4296 ->getValue()
4297 .zextOrTrunc(Cnt.getBitWidth());
4298 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4299 MaxLanes = MaxLanes.multiply(
4300 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4301
4302 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4303 return replaceInstUsesWith(
4304 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4305 return nullptr;
4306 }
4307 default: {
4308 // Handle target specific intrinsics
4309 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4310 if (V)
4311 return *V;
4312 break;
4313 }
4314 }
4315
4316 // Try to fold intrinsic into select/phi operands. This is legal if:
4317 // * The intrinsic is speculatable.
4318 // * The operand is one of the following:
4319 // - a phi.
4320 // - a select with a scalar condition.
4321 // - a select with a vector condition and II is not a cross lane operation.
4323 for (Value *Op : II->args()) {
4324 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4325 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4326 if (IsVectorCond &&
4327 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4328 continue;
4329 // Don't replace a scalar select with a more expensive vector select if
4330 // we can't simplify both arms of the select.
4331 bool SimplifyBothArms =
4332 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4334 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4335 return R;
4336 }
4337 if (auto *Phi = dyn_cast<PHINode>(Op))
4338 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4339 return R;
4340 }
4341 }
4342
4344 return Shuf;
4345
4347 return replaceInstUsesWith(*II, Reverse);
4348
4350 return replaceInstUsesWith(*II, Res);
4351
4352 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4353 // context, so it is handled in visitCallBase and we should trigger it.
4354 return visitCallBase(*II);
4355}
4356
4357// Fence instruction simplification
4359 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4360 // This check is solely here to handle arbitrary target-dependent syncscopes.
4361 // TODO: Can remove if does not matter in practice.
4362 if (NFI && FI.isIdenticalTo(NFI))
4363 return eraseInstFromFunction(FI);
4364
4365 // Returns true if FI1 is identical or stronger fence than FI2.
4366 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4367 auto FI1SyncScope = FI1->getSyncScopeID();
4368 // Consider same scope, where scope is global or single-thread.
4369 if (FI1SyncScope != FI2->getSyncScopeID() ||
4370 (FI1SyncScope != SyncScope::System &&
4371 FI1SyncScope != SyncScope::SingleThread))
4372 return false;
4373
4374 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4375 };
4376 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4377 return eraseInstFromFunction(FI);
4378
4379 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4380 if (isIdenticalOrStrongerFence(PFI, &FI))
4381 return eraseInstFromFunction(FI);
4382 return nullptr;
4383}
4384
4385// InvokeInst simplification
4387 return visitCallBase(II);
4388}
4389
4390// CallBrInst simplification
4392 return visitCallBase(CBI);
4393}
4394
4396 if (!CI->hasFnAttr("modular-format"))
4397 return nullptr;
4398
4400 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4401 // TODO: Make use of the first two arguments
4402 unsigned FirstArgIdx;
4403 [[maybe_unused]] bool Error;
4404 Error = Args[2].getAsInteger(10, FirstArgIdx);
4405 assert(!Error && "invalid first arg index");
4406 if (FirstArgIdx == 0)
4407 return nullptr;
4408 --FirstArgIdx;
4409 StringRef FnName = Args[3];
4410 StringRef ImplName = Args[4];
4412
4413 if (AllAspects.empty())
4414 return nullptr;
4415
4416 SmallVector<StringRef> NeededAspects;
4417 for (StringRef Aspect : AllAspects) {
4418 if (Aspect == "float") {
4419 if (llvm::any_of(
4420 llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
4421 CI->arg_end()),
4422 [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4423 NeededAspects.push_back("float");
4424 } else {
4425 // Unknown aspects are always considered to be needed.
4426 NeededAspects.push_back(Aspect);
4427 }
4428 }
4429
4430 if (NeededAspects.size() == AllAspects.size())
4431 return nullptr;
4432
4433 Module *M = CI->getModule();
4434 LLVMContext &Ctx = M->getContext();
4435 Function *Callee = CI->getCalledFunction();
4436 FunctionCallee ModularFn = M->getOrInsertFunction(
4437 FnName, Callee->getFunctionType(),
4438 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4439 CallInst *New = cast<CallInst>(CI->clone());
4440 New->setCalledFunction(ModularFn);
4441 New->removeFnAttr("modular-format");
4442 B.Insert(New);
4443
4444 const auto ReferenceAspect = [&](StringRef Aspect) {
4445 SmallString<20> Name = ImplName;
4446 Name += '_';
4447 Name += Aspect;
4448 Function *RelocNoneFn =
4449 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4450 B.CreateCall(RelocNoneFn,
4451 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4452 };
4453
4454 llvm::sort(NeededAspects);
4455 for (StringRef Request : NeededAspects)
4456 ReferenceAspect(Request);
4457
4458 return New;
4459}
4460
4461Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4462 if (!CI->getCalledFunction()) return nullptr;
4463
4464 // Skip optimizing notail and musttail calls so
4465 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4466 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4467 if (CI->isMustTailCall() || CI->isNoTailCall())
4468 return nullptr;
4469
4470 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4471 replaceInstUsesWith(*From, With);
4472 };
4473 auto InstCombineErase = [this](Instruction *I) {
4475 };
4476 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4477 InstCombineRAUW, InstCombineErase);
4478 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4479 ++NumSimplified;
4480 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4481 }
4482 if (Value *With = optimizeModularFormat(CI, Builder)) {
4483 ++NumSimplified;
4484 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4485 }
4486
4487 return nullptr;
4488}
4489
4491 // Strip off at most one level of pointer casts, looking for an alloca. This
4492 // is good enough in practice and simpler than handling any number of casts.
4493 Value *Underlying = TrampMem->stripPointerCasts();
4494 if (Underlying != TrampMem &&
4495 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4496 return nullptr;
4497 if (!isa<AllocaInst>(Underlying))
4498 return nullptr;
4499
4500 IntrinsicInst *InitTrampoline = nullptr;
4501 for (User *U : TrampMem->users()) {
4503 if (!II)
4504 return nullptr;
4505 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4506 if (InitTrampoline)
4507 // More than one init_trampoline writes to this value. Give up.
4508 return nullptr;
4509 InitTrampoline = II;
4510 continue;
4511 }
4512 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4513 // Allow any number of calls to adjust.trampoline.
4514 continue;
4515 return nullptr;
4516 }
4517
4518 // No call to init.trampoline found.
4519 if (!InitTrampoline)
4520 return nullptr;
4521
4522 // Check that the alloca is being used in the expected way.
4523 if (InitTrampoline->getOperand(0) != TrampMem)
4524 return nullptr;
4525
4526 return InitTrampoline;
4527}
4528
4530 Value *TrampMem) {
4531 // Visit all the previous instructions in the basic block, and try to find a
4532 // init.trampoline which has a direct path to the adjust.trampoline.
4533 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4534 E = AdjustTramp->getParent()->begin();
4535 I != E;) {
4536 Instruction *Inst = &*--I;
4538 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4539 II->getOperand(0) == TrampMem)
4540 return II;
4541 if (Inst->mayWriteToMemory())
4542 return nullptr;
4543 }
4544 return nullptr;
4545}
4546
4547// Given a call to llvm.adjust.trampoline, find and return the corresponding
4548// call to llvm.init.trampoline if the call to the trampoline can be optimized
4549// to a direct call to a function. Otherwise return NULL.
4551 Callee = Callee->stripPointerCasts();
4552 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4553 if (!AdjustTramp ||
4554 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4555 return nullptr;
4556
4557 Value *TrampMem = AdjustTramp->getOperand(0);
4558
4560 return IT;
4561 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4562 return IT;
4563 return nullptr;
4564}
4565
4566Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4567 const Value *Callee = Call.getCalledOperand();
4568 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4569 if (!IPC || !IPC->isNoopCast(DL))
4570 return nullptr;
4571
4572 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4573 if (!II)
4574 return nullptr;
4575
4576 Intrinsic::ID IIID = II->getIntrinsicID();
4577 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4578 return nullptr;
4579
4580 // Isolate the ptrauth bundle from the others.
4581 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4583 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4584 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4585 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4586 PtrAuthBundleOrNone = Bundle;
4587 else
4588 NewBundles.emplace_back(Bundle);
4589 }
4590
4591 if (!PtrAuthBundleOrNone)
4592 return nullptr;
4593
4594 Value *NewCallee = nullptr;
4595 switch (IIID) {
4596 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4597 // assuming the call bundle and the sign operands match.
4598 case Intrinsic::ptrauth_resign: {
4599 // Resign result key should match bundle.
4600 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4601 return nullptr;
4602 // Resign result discriminator should match bundle.
4603 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4604 return nullptr;
4605
4606 // Resign input (auth) key should also match: we can't change the key on
4607 // the new call we're generating, because we don't know what keys are valid.
4608 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4609 return nullptr;
4610
4611 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4612 NewBundles.emplace_back("ptrauth", NewBundleOps);
4613 NewCallee = II->getOperand(0);
4614 break;
4615 }
4616
4617 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4618 // assuming the call bundle and the sign operands match.
4619 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4620 case Intrinsic::ptrauth_sign: {
4621 // Sign key should match bundle.
4622 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4623 return nullptr;
4624 // Sign discriminator should match bundle.
4625 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4626 return nullptr;
4627 NewCallee = II->getOperand(0);
4628 break;
4629 }
4630 default:
4631 llvm_unreachable("unexpected intrinsic ID");
4632 }
4633
4634 if (!NewCallee)
4635 return nullptr;
4636
4637 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4638 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4639 NewCall->setCalledOperand(NewCallee);
4640 return NewCall;
4641}
4642
4643Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4645 if (!CPA)
4646 return nullptr;
4647
4648 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4649 // If the ptrauth constant isn't based on a function pointer, bail out.
4650 if (!CalleeF)
4651 return nullptr;
4652
4653 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4655 if (!PAB)
4656 return nullptr;
4657
4658 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4659 Value *Discriminator = PAB->Inputs[1];
4660
4661 // If the bundle doesn't match, this is probably going to fail to auth.
4662 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4663 return nullptr;
4664
4665 // If the bundle matches the constant, proceed in making this a direct call.
4667 NewCall->setCalledOperand(CalleeF);
4668 return NewCall;
4669}
4670
4671bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4672 const TargetLibraryInfo *TLI) {
4673 // Note: We only handle cases which can't be driven from generic attributes
4674 // here. So, for example, nonnull and noalias (which are common properties
4675 // of some allocation functions) are expected to be handled via annotation
4676 // of the respective allocator declaration with generic attributes.
4677 bool Changed = false;
4678
4679 if (!Call.getType()->isPointerTy())
4680 return Changed;
4681
4682 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4683 if (Size && *Size != 0) {
4684 // TODO: We really should just emit deref_or_null here and then
4685 // let the generic inference code combine that with nonnull.
4686 if (Call.hasRetAttr(Attribute::NonNull)) {
4687 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4689 Call.getContext(), Size->getLimitedValue()));
4690 } else {
4691 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4693 Call.getContext(), Size->getLimitedValue()));
4694 }
4695 }
4696
4697 // Add alignment attribute if alignment is a power of two constant.
4698 Value *Alignment = getAllocAlignment(&Call, TLI);
4699 if (!Alignment)
4700 return Changed;
4701
4702 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4703 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4704 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4705 if (llvm::isPowerOf2_64(AlignmentVal)) {
4706 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4707 Align NewAlign = Align(AlignmentVal);
4708 if (NewAlign > ExistingAlign) {
4711 Changed = true;
4712 }
4713 }
4714 }
4715 return Changed;
4716}
4717
4718/// Improvements for call, callbr and invoke instructions.
4719Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4720 bool Changed = annotateAnyAllocSite(Call, &TLI);
4721
4722 // Mark any parameters that are known to be non-null with the nonnull
4723 // attribute. This is helpful for inlining calls to functions with null
4724 // checks on their arguments.
4725 SmallVector<unsigned, 4> ArgNos;
4726 unsigned ArgNo = 0;
4727
4728 for (Value *V : Call.args()) {
4729 if (V->getType()->isPointerTy()) {
4730 // Simplify the nonnull operand if the parameter is known to be nonnull.
4731 // Otherwise, try to infer nonnull for it.
4732 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4733 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4734 (HasDereferenceable &&
4736 V->getType()->getPointerAddressSpace()))) {
4737 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4738 replaceOperand(Call, ArgNo, Res);
4739 Changed = true;
4740 }
4741 } else if (isKnownNonZero(V,
4742 getSimplifyQuery().getWithInstruction(&Call))) {
4743 ArgNos.push_back(ArgNo);
4744 }
4745 }
4746 ArgNo++;
4747 }
4748
4749 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4750
4751 if (!ArgNos.empty()) {
4752 AttributeList AS = Call.getAttributes();
4753 LLVMContext &Ctx = Call.getContext();
4754 AS = AS.addParamAttribute(Ctx, ArgNos,
4755 Attribute::get(Ctx, Attribute::NonNull));
4756 Call.setAttributes(AS);
4757 Changed = true;
4758 }
4759
4760 // If the callee is a pointer to a function, attempt to move any casts to the
4761 // arguments of the call/callbr/invoke.
4763 Function *CalleeF = dyn_cast<Function>(Callee);
4764 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4765 transformConstExprCastCall(Call))
4766 return nullptr;
4767
4768 if (CalleeF) {
4769 // Remove the convergent attr on calls when the callee is not convergent.
4770 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4771 !CalleeF->isIntrinsic()) {
4772 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4773 << "\n");
4775 return &Call;
4776 }
4777
4778 // If the call and callee calling conventions don't match, and neither one
4779 // of the calling conventions is compatible with C calling convention
4780 // this call must be unreachable, as the call is undefined.
4781 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4782 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4786 // Only do this for calls to a function with a body. A prototype may
4787 // not actually end up matching the implementation's calling conv for a
4788 // variety of reasons (e.g. it may be written in assembly).
4789 !CalleeF->isDeclaration()) {
4790 Instruction *OldCall = &Call;
4792 // If OldCall does not return void then replaceInstUsesWith poison.
4793 // This allows ValueHandlers and custom metadata to adjust itself.
4794 if (!OldCall->getType()->isVoidTy())
4795 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4796 if (isa<CallInst>(OldCall))
4797 return eraseInstFromFunction(*OldCall);
4798
4799 // We cannot remove an invoke or a callbr, because it would change thexi
4800 // CFG, just change the callee to a null pointer.
4801 cast<CallBase>(OldCall)->setCalledFunction(
4802 CalleeF->getFunctionType(),
4803 Constant::getNullValue(CalleeF->getType()));
4804 return nullptr;
4805 }
4806 }
4807
4808 // Calling a null function pointer is undefined if a null address isn't
4809 // dereferenceable.
4810 if ((isa<ConstantPointerNull>(Callee) &&
4812 isa<UndefValue>(Callee)) {
4813 // If Call does not return void then replaceInstUsesWith poison.
4814 // This allows ValueHandlers and custom metadata to adjust itself.
4815 if (!Call.getType()->isVoidTy())
4817
4818 if (Call.isTerminator()) {
4819 // Can't remove an invoke or callbr because we cannot change the CFG.
4820 return nullptr;
4821 }
4822
4823 // This instruction is not reachable, just remove it.
4826 }
4827
4828 if (IntrinsicInst *II = findInitTrampoline(Callee))
4829 return transformCallThroughTrampoline(Call, *II);
4830
4831 // Combine calls involving pointer authentication intrinsics.
4832 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4833 return NewCall;
4834
4835 // Combine calls to ptrauth constants.
4836 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4837 return NewCall;
4838
4839 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4840 InlineAsm *IA = cast<InlineAsm>(Callee);
4841 if (!IA->canThrow()) {
4842 // Normal inline asm calls cannot throw - mark them
4843 // 'nounwind'.
4845 Changed = true;
4846 }
4847 }
4848
4849 // Try to optimize the call if possible, we require DataLayout for most of
4850 // this. None of these calls are seen as possibly dead so go ahead and
4851 // delete the instruction now.
4852 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4853 Instruction *I = tryOptimizeCall(CI);
4854 // If we changed something return the result, etc. Otherwise let
4855 // the fallthrough check.
4856 if (I) return eraseInstFromFunction(*I);
4857 }
4858
4859 if (!Call.use_empty() && !Call.isMustTailCall())
4860 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4861 Type *CallTy = Call.getType();
4862 Type *RetArgTy = ReturnedArg->getType();
4863 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4864 return replaceInstUsesWith(
4865 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4866 }
4867
4868 // Drop unnecessary callee_type metadata from calls that were converted
4869 // into direct calls.
4870 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4871 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4872 Changed = true;
4873 }
4874
4875 // Drop unnecessary kcfi operand bundles from calls that were converted
4876 // into direct calls.
4878 if (Bundle && !Call.isIndirectCall()) {
4879 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4880 if (CalleeF) {
4881 ConstantInt *FunctionType = nullptr;
4882 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4883
4884 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4885 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4886
4887 if (FunctionType &&
4888 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4889 dbgs() << Call.getModule()->getName()
4890 << ": warning: kcfi: " << Call.getCaller()->getName()
4891 << ": call to " << CalleeF->getName()
4892 << " using a mismatching function pointer type\n";
4893 }
4894 });
4895
4897 }
4898
4899 if (isRemovableAlloc(&Call, &TLI))
4900 return visitAllocSite(Call);
4901
4902 // Handle intrinsics which can be used in both call and invoke context.
4903 switch (Call.getIntrinsicID()) {
4904 case Intrinsic::experimental_gc_statepoint: {
4905 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
4906 SmallPtrSet<Value *, 32> LiveGcValues;
4907 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4908 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4909
4910 // Remove the relocation if unused.
4911 if (GCR.use_empty()) {
4913 continue;
4914 }
4915
4916 Value *DerivedPtr = GCR.getDerivedPtr();
4917 Value *BasePtr = GCR.getBasePtr();
4918
4919 // Undef is undef, even after relocation.
4920 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
4923 continue;
4924 }
4925
4926 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
4927 // The relocation of null will be null for most any collector.
4928 // TODO: provide a hook for this in GCStrategy. There might be some
4929 // weird collector this property does not hold for.
4930 if (isa<ConstantPointerNull>(DerivedPtr)) {
4931 // Use null-pointer of gc_relocate's type to replace it.
4934 continue;
4935 }
4936
4937 // isKnownNonNull -> nonnull attribute
4938 if (!GCR.hasRetAttr(Attribute::NonNull) &&
4939 isKnownNonZero(DerivedPtr,
4940 getSimplifyQuery().getWithInstruction(&Call))) {
4941 GCR.addRetAttr(Attribute::NonNull);
4942 // We discovered new fact, re-check users.
4943 Worklist.pushUsersToWorkList(GCR);
4944 }
4945 }
4946
4947 // If we have two copies of the same pointer in the statepoint argument
4948 // list, canonicalize to one. This may let us common gc.relocates.
4949 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4950 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4951 auto *OpIntTy = GCR.getOperand(2)->getType();
4952 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
4953 }
4954
4955 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4956 // Canonicalize on the type from the uses to the defs
4957
4958 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4959 LiveGcValues.insert(BasePtr);
4960 LiveGcValues.insert(DerivedPtr);
4961 }
4962 std::optional<OperandBundleUse> Bundle =
4964 unsigned NumOfGCLives = LiveGcValues.size();
4965 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4966 break;
4967 // We can reduce the size of gc live bundle.
4968 DenseMap<Value *, unsigned> Val2Idx;
4969 std::vector<Value *> NewLiveGc;
4970 for (Value *V : Bundle->Inputs) {
4971 auto [It, Inserted] = Val2Idx.try_emplace(V);
4972 if (!Inserted)
4973 continue;
4974 if (LiveGcValues.count(V)) {
4975 It->second = NewLiveGc.size();
4976 NewLiveGc.push_back(V);
4977 } else
4978 It->second = NumOfGCLives;
4979 }
4980 // Update all gc.relocates
4981 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4982 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4983 Value *BasePtr = GCR.getBasePtr();
4984 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4985 "Missed live gc for base pointer");
4986 auto *OpIntTy1 = GCR.getOperand(1)->getType();
4987 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
4988 Value *DerivedPtr = GCR.getDerivedPtr();
4989 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4990 "Missed live gc for derived pointer");
4991 auto *OpIntTy2 = GCR.getOperand(2)->getType();
4992 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
4993 }
4994 // Create new statepoint instruction.
4995 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
4996 return CallBase::Create(&Call, NewBundle);
4997 }
4998 default: { break; }
4999 }
5000
5001 return Changed ? &Call : nullptr;
5002}
5003
5004/// If the callee is a constexpr cast of a function, attempt to move the cast to
5005/// the arguments of the call/invoke.
5006/// CallBrInst is not supported.
5007bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
5008 auto *Callee =
5010 if (!Callee)
5011 return false;
5012
5014 "CallBr's don't have a single point after a def to insert at");
5015
5016 // Don't perform the transform for declarations, which may not be fully
5017 // accurate. For example, void @foo() is commonly used as a placeholder for
5018 // unknown prototypes.
5019 if (Callee->isDeclaration())
5020 return false;
5021
5022 // If this is a call to a thunk function, don't remove the cast. Thunks are
5023 // used to transparently forward all incoming parameters and outgoing return
5024 // values, so it's important to leave the cast in place.
5025 if (Callee->hasFnAttribute("thunk"))
5026 return false;
5027
5028 // If this is a call to a naked function, the assembly might be
5029 // using an argument, or otherwise rely on the frame layout,
5030 // the function prototype will mismatch.
5031 if (Callee->hasFnAttribute(Attribute::Naked))
5032 return false;
5033
5034 // If this is a musttail call, the callee's prototype must match the caller's
5035 // prototype with the exception of pointee types. The code below doesn't
5036 // implement that, so we can't do this transform.
5037 // TODO: Do the transform if it only requires adding pointer casts.
5038 if (Call.isMustTailCall())
5039 return false;
5040
5042 const AttributeList &CallerPAL = Call.getAttributes();
5043
5044 // Okay, this is a cast from a function to a different type. Unless doing so
5045 // would cause a type conversion of one of our arguments, change this call to
5046 // be a direct call with arguments casted to the appropriate types.
5047 FunctionType *FT = Callee->getFunctionType();
5048 Type *OldRetTy = Caller->getType();
5049 Type *NewRetTy = FT->getReturnType();
5050
5051 // Check to see if we are changing the return type...
5052 if (OldRetTy != NewRetTy) {
5053
5054 if (NewRetTy->isStructTy())
5055 return false; // TODO: Handle multiple return values.
5056
5057 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
5058 if (!Caller->use_empty())
5059 return false; // Cannot transform this return value.
5060 }
5061
5062 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5063 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5064 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
5065 NewRetTy, CallerPAL.getRetAttrs())))
5066 return false; // Attribute not compatible with transformed value.
5067 }
5068
5069 // If the callbase is an invoke instruction, and the return value is
5070 // used by a PHI node in a successor, we cannot change the return type of
5071 // the call because there is no place to put the cast instruction (without
5072 // breaking the critical edge). Bail out in this case.
5073 if (!Caller->use_empty()) {
5074 BasicBlock *PhisNotSupportedBlock = nullptr;
5075 if (auto *II = dyn_cast<InvokeInst>(Caller))
5076 PhisNotSupportedBlock = II->getNormalDest();
5077 if (PhisNotSupportedBlock)
5078 for (User *U : Caller->users())
5079 if (PHINode *PN = dyn_cast<PHINode>(U))
5080 if (PN->getParent() == PhisNotSupportedBlock)
5081 return false;
5082 }
5083 }
5084
5085 unsigned NumActualArgs = Call.arg_size();
5086 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5087
5088 // Prevent us turning:
5089 // declare void @takes_i32_inalloca(i32* inalloca)
5090 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5091 //
5092 // into:
5093 // call void @takes_i32_inalloca(i32* null)
5094 //
5095 // Similarly, avoid folding away bitcasts of byval calls.
5096 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
5097 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
5098 return false;
5099
5100 auto AI = Call.arg_begin();
5101 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5102 Type *ParamTy = FT->getParamType(i);
5103 Type *ActTy = (*AI)->getType();
5104
5105 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5106 return false; // Cannot transform this parameter value.
5107
5108 // Check if there are any incompatible attributes we cannot drop safely.
5109 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5110 .overlaps(AttributeFuncs::typeIncompatible(
5111 ParamTy, CallerPAL.getParamAttrs(i),
5112 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5113 return false; // Attribute not compatible with transformed value.
5114
5115 if (Call.isInAllocaArgument(i) ||
5116 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5117 return false; // Cannot transform to and from inalloca/preallocated.
5118
5119 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5120 return false;
5121
5122 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5123 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5124 return false; // Cannot transform to or from byval.
5125 }
5126
5127 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5128 !CallerPAL.isEmpty()) {
5129 // In this case we have more arguments than the new function type, but we
5130 // won't be dropping them. Check that these extra arguments have attributes
5131 // that are compatible with being a vararg call argument.
5132 unsigned SRetIdx;
5133 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5134 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5135 return false;
5136 }
5137
5138 // Okay, we decided that this is a safe thing to do: go ahead and start
5139 // inserting cast instructions as necessary.
5140 SmallVector<Value *, 8> Args;
5142 Args.reserve(NumActualArgs);
5143 ArgAttrs.reserve(NumActualArgs);
5144
5145 // Get any return attributes.
5146 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5147
5148 // If the return value is not being used, the type may not be compatible
5149 // with the existing attributes. Wipe out any problematic attributes.
5150 RAttrs.remove(
5151 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5152
5153 LLVMContext &Ctx = Call.getContext();
5154 AI = Call.arg_begin();
5155 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5156 Type *ParamTy = FT->getParamType(i);
5157
5158 Value *NewArg = *AI;
5159 if ((*AI)->getType() != ParamTy)
5160 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5161 Args.push_back(NewArg);
5162
5163 // Add any parameter attributes except the ones incompatible with the new
5164 // type. Note that we made sure all incompatible ones are safe to drop.
5165 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5166 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5167 ArgAttrs.push_back(
5168 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5169 }
5170
5171 // If the function takes more arguments than the call was taking, add them
5172 // now.
5173 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5174 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5175 ArgAttrs.push_back(AttributeSet());
5176 }
5177
5178 // If we are removing arguments to the function, emit an obnoxious warning.
5179 if (FT->getNumParams() < NumActualArgs) {
5180 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5181 if (FT->isVarArg()) {
5182 // Add all of the arguments in their promoted form to the arg list.
5183 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5184 Type *PTy = getPromotedType((*AI)->getType());
5185 Value *NewArg = *AI;
5186 if (PTy != (*AI)->getType()) {
5187 // Must promote to pass through va_arg area!
5188 Instruction::CastOps opcode =
5189 CastInst::getCastOpcode(*AI, false, PTy, false);
5190 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5191 }
5192 Args.push_back(NewArg);
5193
5194 // Add any parameter attributes.
5195 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5196 }
5197 }
5198 }
5199
5200 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5201
5202 if (NewRetTy->isVoidTy())
5203 Caller->setName(""); // Void type should not have a name.
5204
5205 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5206 "missing argument attributes");
5207 AttributeList NewCallerPAL = AttributeList::get(
5208 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5209
5211 Call.getOperandBundlesAsDefs(OpBundles);
5212
5213 CallBase *NewCall;
5214 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5215 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5216 II->getUnwindDest(), Args, OpBundles);
5217 } else {
5218 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5219 cast<CallInst>(NewCall)->setTailCallKind(
5220 cast<CallInst>(Caller)->getTailCallKind());
5221 }
5222 NewCall->takeName(Caller);
5224 NewCall->setAttributes(NewCallerPAL);
5225
5226 // Preserve prof metadata if any.
5227 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5228
5229 // Insert a cast of the return type as necessary.
5230 Instruction *NC = NewCall;
5231 Value *NV = NC;
5232 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5233 assert(!NV->getType()->isVoidTy());
5235 NC->setDebugLoc(Caller->getDebugLoc());
5236
5237 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5238 assert(OptInsertPt && "No place to insert cast");
5239 InsertNewInstBefore(NC, *OptInsertPt);
5240 Worklist.pushUsersToWorkList(*Caller);
5241 }
5242
5243 if (!Caller->use_empty())
5244 replaceInstUsesWith(*Caller, NV);
5245 else if (Caller->hasValueHandle()) {
5246 if (OldRetTy == NV->getType())
5248 else
5249 // We cannot call ValueIsRAUWd with a different type, and the
5250 // actual tracked value will disappear.
5252 }
5253
5254 eraseInstFromFunction(*Caller);
5255 return true;
5256}
5257
5258/// Turn a call to a function created by init_trampoline / adjust_trampoline
5259/// intrinsic pair into a direct call to the underlying function.
5261InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5262 IntrinsicInst &Tramp) {
5263 FunctionType *FTy = Call.getFunctionType();
5264 AttributeList Attrs = Call.getAttributes();
5265
5266 // If the call already has the 'nest' attribute somewhere then give up -
5267 // otherwise 'nest' would occur twice after splicing in the chain.
5268 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5269 return nullptr;
5270
5272 FunctionType *NestFTy = NestF->getFunctionType();
5273
5274 AttributeList NestAttrs = NestF->getAttributes();
5275 if (!NestAttrs.isEmpty()) {
5276 unsigned NestArgNo = 0;
5277 Type *NestTy = nullptr;
5278 AttributeSet NestAttr;
5279
5280 // Look for a parameter marked with the 'nest' attribute.
5281 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5282 E = NestFTy->param_end();
5283 I != E; ++NestArgNo, ++I) {
5284 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5285 if (AS.hasAttribute(Attribute::Nest)) {
5286 // Record the parameter type and any other attributes.
5287 NestTy = *I;
5288 NestAttr = AS;
5289 break;
5290 }
5291 }
5292
5293 if (NestTy) {
5294 std::vector<Value*> NewArgs;
5295 std::vector<AttributeSet> NewArgAttrs;
5296 NewArgs.reserve(Call.arg_size() + 1);
5297 NewArgAttrs.reserve(Call.arg_size());
5298
5299 // Insert the nest argument into the call argument list, which may
5300 // mean appending it. Likewise for attributes.
5301
5302 {
5303 unsigned ArgNo = 0;
5304 auto I = Call.arg_begin(), E = Call.arg_end();
5305 do {
5306 if (ArgNo == NestArgNo) {
5307 // Add the chain argument and attributes.
5308 Value *NestVal = Tramp.getArgOperand(2);
5309 if (NestVal->getType() != NestTy)
5310 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5311 NewArgs.push_back(NestVal);
5312 NewArgAttrs.push_back(NestAttr);
5313 }
5314
5315 if (I == E)
5316 break;
5317
5318 // Add the original argument and attributes.
5319 NewArgs.push_back(*I);
5320 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5321
5322 ++ArgNo;
5323 ++I;
5324 } while (true);
5325 }
5326
5327 // The trampoline may have been bitcast to a bogus type (FTy).
5328 // Handle this by synthesizing a new function type, equal to FTy
5329 // with the chain parameter inserted.
5330
5331 std::vector<Type*> NewTypes;
5332 NewTypes.reserve(FTy->getNumParams()+1);
5333
5334 // Insert the chain's type into the list of parameter types, which may
5335 // mean appending it.
5336 {
5337 unsigned ArgNo = 0;
5338 FunctionType::param_iterator I = FTy->param_begin(),
5339 E = FTy->param_end();
5340
5341 do {
5342 if (ArgNo == NestArgNo)
5343 // Add the chain's type.
5344 NewTypes.push_back(NestTy);
5345
5346 if (I == E)
5347 break;
5348
5349 // Add the original type.
5350 NewTypes.push_back(*I);
5351
5352 ++ArgNo;
5353 ++I;
5354 } while (true);
5355 }
5356
5357 // Replace the trampoline call with a direct call. Let the generic
5358 // code sort out any function type mismatches.
5359 FunctionType *NewFTy =
5360 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5361 AttributeList NewPAL =
5362 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5363 Attrs.getRetAttrs(), NewArgAttrs);
5364
5366 Call.getOperandBundlesAsDefs(OpBundles);
5367
5368 Instruction *NewCaller;
5369 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5370 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5371 II->getUnwindDest(), NewArgs, OpBundles);
5372 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5373 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5374 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5375 NewCaller =
5376 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5377 CBI->getIndirectDests(), NewArgs, OpBundles);
5378 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5379 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5380 } else {
5381 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5382 cast<CallInst>(NewCaller)->setTailCallKind(
5383 cast<CallInst>(Call).getTailCallKind());
5384 cast<CallInst>(NewCaller)->setCallingConv(
5385 cast<CallInst>(Call).getCallingConv());
5386 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5387 }
5388 NewCaller->setDebugLoc(Call.getDebugLoc());
5389
5390 return NewCaller;
5391 }
5392 }
5393
5394 // Replace the trampoline call with a direct call. Since there is no 'nest'
5395 // parameter, there is no need to adjust the argument list. Let the generic
5396 // code sort out any function type mismatches.
5397 Call.setCalledFunction(FTy, NestF);
5398 return &Call;
5399}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp1,...
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy)
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:260
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:273
bool isNegative() const
Definition APFloat.h:1538
void clearSign()
Definition APFloat.h:1357
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1147
bool isZero() const
Definition APFloat.h:1534
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1207
bool isInfinity() const
Definition APFloat.h:1535
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2000
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1980
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1987
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2088
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1993
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:310
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:302
This class represents any memset intrinsic.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:194
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:271
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:314
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:329
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:275
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
static LLVM_ABI CallBase * removeOperandBundleAt(CallBase *CB, size_t Offset, InsertPosition InsertPtr=nullptr)
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:743
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:748
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:751
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:756
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:934
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:874
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI ConstantRange multiply(const ConstantRange &Other, unsigned NoWrapKind=0) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:221
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:216
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:618
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:602
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:251
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this GlobalObject.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:509
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1461
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2130
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2659
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:514
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2494
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2257
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
LLVM_ABI std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:271
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:113
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:107
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:130
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:148
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:156
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1250
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1303
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:798
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:712
bool use_empty() const
Definition Value.h:346
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:797
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
auto m_Constant()
Match an arbitrary Constant and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
auto m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
auto m_c_MaxOrMin(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
auto m_UnOp()
Match an arbitrary unary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
BundleAttr getBundleAttrFromOBU(OperandBundleUse OBU)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI AssumeSeparateStorageInfo getAssumeSeparateStorageInfo(OperandBundleUse)
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1740
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1695
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1726
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1640
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1676
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI AssumeNonNullInfo getAssumeNonNullInfo(OperandBundleUse)
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
constexpr unsigned BitWidth
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
LLVM_ABI AssumeAlignInfo getAssumeAlignInfo(OperandBundleUse)
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const SimplifyQuery &Q, bool IgnoreFree=false)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:265
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1713
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1753
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI AssumeDereferenceableInfo getAssumeDereferenceableInfo(OperandBundleUse)
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:288
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:109
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:294
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:81
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const