LLVM 23.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/Bitset.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/Analysis/Loads.h"
33#include "llvm/IR/Attributes.h"
34#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/Constant.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/DataLayout.h"
39#include "llvm/IR/DebugInfo.h"
41#include "llvm/IR/Function.h"
43#include "llvm/IR/InlineAsm.h"
44#include "llvm/IR/InstrTypes.h"
45#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Intrinsics.h"
49#include "llvm/IR/IntrinsicsAArch64.h"
50#include "llvm/IR/IntrinsicsAMDGPU.h"
51#include "llvm/IR/IntrinsicsARM.h"
52#include "llvm/IR/IntrinsicsHexagon.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Statepoint.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/User.h"
60#include "llvm/IR/Value.h"
61#include "llvm/IR/ValueHandle.h"
66#include "llvm/Support/Debug.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82#include <vector>
83
84#define DEBUG_TYPE "instcombine"
86
87using namespace llvm;
88using namespace PatternMatch;
89
90STATISTIC(NumSimplified, "Number of library calls simplified");
91
93 "instcombine-guard-widening-window",
94 cl::init(3),
95 cl::desc("How wide an instruction window to bypass looking for "
96 "another guard"));
97
98/// Return the specified type promoted as it would be to pass though a va_arg
99/// area.
101 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
102 if (ITy->getBitWidth() < 32)
103 return Type::getInt32Ty(Ty->getContext());
104 }
105 return Ty;
106}
107
108/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
109/// TODO: This should probably be integrated with visitAllocSites, but that
110/// requires a deeper change to allow either unread or unwritten objects.
112 auto *Src = MI->getRawSource();
113 while (isa<GetElementPtrInst>(Src)) {
114 if (!Src->hasOneUse())
115 return false;
116 Src = cast<Instruction>(Src)->getOperand(0);
117 }
118 return isa<AllocaInst>(Src) && Src->hasOneUse();
119}
120
122 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
123 MaybeAlign CopyDstAlign = MI->getDestAlign();
124 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
125 MI->setDestAlignment(DstAlign);
126 return MI;
127 }
128
129 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
130 MaybeAlign CopySrcAlign = MI->getSourceAlign();
131 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
132 MI->setSourceAlignment(SrcAlign);
133 return MI;
134 }
135
136 // If we have a store to a location which is known constant, we can conclude
137 // that the store must be storing the constant value (else the memory
138 // wouldn't be constant), and this must be a noop.
139 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
140 // Set the size of the copy to 0, it will be deleted on the next iteration.
141 MI->setLength((uint64_t)0);
142 return MI;
143 }
144
145 // If the source is provably undef, the memcpy/memmove doesn't do anything
146 // (unless the transfer is volatile).
147 if (hasUndefSource(MI) && !MI->isVolatile()) {
148 // Set the size of the copy to 0, it will be deleted on the next iteration.
149 MI->setLength((uint64_t)0);
150 return MI;
151 }
152
153 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
154 // load/store.
155 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
156 if (!MemOpLength) return nullptr;
157
158 // Source and destination pointer types are always "i8*" for intrinsic. See
159 // if the size is something we can handle with a single primitive load/store.
160 // A single load+store correctly handles overlapping memory in the memmove
161 // case.
162 uint64_t Size = MemOpLength->getLimitedValue();
163 assert(Size && "0-sized memory transferring should be removed already.");
164
165 if (Size > 8 || (Size&(Size-1)))
166 return nullptr; // If not 1/2/4/8 bytes, exit.
167
168 // If it is an atomic and alignment is less than the size then we will
169 // introduce the unaligned memory access which will be later transformed
170 // into libcall in CodeGen. This is not evident performance gain so disable
171 // it now.
172 if (MI->isAtomic())
173 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
174 return nullptr;
175
176 // Use an integer load+store unless we can find something better.
177 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
178
179 // If the memcpy has metadata describing the members, see if we can get the
180 // TBAA, scope and noalias tags describing our copy.
181 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
182
183 Value *Src = MI->getArgOperand(1);
184 Value *Dest = MI->getArgOperand(0);
185 LoadInst *L = Builder.CreateLoad(IntType, Src);
186 // Alignment from the mem intrinsic will be better, so use it.
187 L->setAlignment(*CopySrcAlign);
188 L->setAAMetadata(AACopyMD);
189 MDNode *LoopMemParallelMD =
190 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
191 if (LoopMemParallelMD)
192 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
193 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
194 if (AccessGroupMD)
195 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
196
197 StoreInst *S = Builder.CreateStore(L, Dest);
198 // Alignment from the mem intrinsic will be better, so use it.
199 S->setAlignment(*CopyDstAlign);
200 S->setAAMetadata(AACopyMD);
201 if (LoopMemParallelMD)
202 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
203 if (AccessGroupMD)
204 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
205 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
206
207 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
208 // non-atomics can be volatile
209 L->setVolatile(MT->isVolatile());
210 S->setVolatile(MT->isVolatile());
211 }
212 if (MI->isAtomic()) {
213 // atomics have to be unordered
214 L->setOrdering(AtomicOrdering::Unordered);
216 }
217
218 // Set the size of the copy to 0, it will be deleted on the next iteration.
219 MI->setLength((uint64_t)0);
220 return MI;
221}
222
224 const Align KnownAlignment =
225 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
226 MaybeAlign MemSetAlign = MI->getDestAlign();
227 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
228 MI->setDestAlignment(KnownAlignment);
229 return MI;
230 }
231
232 // If we have a store to a location which is known constant, we can conclude
233 // that the store must be storing the constant value (else the memory
234 // wouldn't be constant), and this must be a noop.
235 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
236 // Set the size of the copy to 0, it will be deleted on the next iteration.
237 MI->setLength((uint64_t)0);
238 return MI;
239 }
240
241 // Remove memset with an undef value.
242 // FIXME: This is technically incorrect because it might overwrite a poison
243 // value. Change to PoisonValue once #52930 is resolved.
244 if (isa<UndefValue>(MI->getValue())) {
245 // Set the size of the copy to 0, it will be deleted on the next iteration.
246 MI->setLength((uint64_t)0);
247 return MI;
248 }
249
250 // Extract the length and alignment and fill if they are constant.
251 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
252 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
253 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
254 return nullptr;
255 const uint64_t Len = LenC->getLimitedValue();
256 assert(Len && "0-sized memory setting should be removed already.");
257 const Align Alignment = MI->getDestAlign().valueOrOne();
258
259 // If it is an atomic and alignment is less than the size then we will
260 // introduce the unaligned memory access which will be later transformed
261 // into libcall in CodeGen. This is not evident performance gain so disable
262 // it now.
263 if (MI->isAtomic() && Alignment < Len)
264 return nullptr;
265
266 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
267 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
268 Value *Dest = MI->getDest();
269
270 // Extract the fill value and store.
271 Constant *FillVal = ConstantInt::get(
272 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
273 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
274 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
275 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
276 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
277 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
278 }
279
280 S->setAlignment(Alignment);
281 if (MI->isAtomic())
283
284 // Set the size of the copy to 0, it will be deleted on the next iteration.
285 MI->setLength((uint64_t)0);
286 return MI;
287 }
288
289 return nullptr;
290}
291
292// TODO, Obvious Missing Transforms:
293// * Narrow width by halfs excluding zero/undef lanes
294Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
295 Value *LoadPtr = II.getArgOperand(0);
296 const Align Alignment = II.getParamAlign(0).valueOrOne();
297 Value *Mask = II.getArgOperand(1);
298
299 // If the mask is all ones or poison, this is a plain vector load of the 1st
300 // argument.
301 if (match(Mask, m_AllOnesOrPoison())) {
302 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
303 "unmaskedload");
304 L->copyMetadata(II);
305 return L;
306 }
307
308 // If we can unconditionally load from this address, replace with a
309 // load/select idiom.
310 if (isDereferenceablePointer(LoadPtr, II.getType(),
312 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
313 "unmaskedload");
314 LI->copyMetadata(II);
315 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
316 }
317
318 return nullptr;
319}
320
321// TODO, Obvious Missing Transforms:
322// * Single constant active lane -> store
323// * Narrow width by halfs excluding zero/undef lanes
324Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
325 Value *StorePtr = II.getArgOperand(1);
326 Align Alignment = II.getParamAlign(1).valueOrOne();
327 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
328 if (!ConstMask)
329 return nullptr;
330
331 // If the mask is all zeros or poison, this instruction does nothing.
332 if (match(ConstMask, m_ZeroOrPoison()))
334
335 // If the mask is all ones or poison, this is a plain vector store of the 1st
336 // argument.
337 if (match(ConstMask, m_AllOnesOrPoison())) {
338 StoreInst *S =
339 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
340 S->copyMetadata(II);
341 return S;
342 }
343
344 if (isa<ScalableVectorType>(ConstMask->getType()))
345 return nullptr;
346
347 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
348 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
349 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
350 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
351 PoisonElts))
352 return replaceOperand(II, 0, V);
353
354 return nullptr;
355}
356
357// TODO, Obvious Missing Transforms:
358// * Single constant active lane load -> load
359// * Dereferenceable address & few lanes -> scalarize speculative load/selects
360// * Adjacent vector addresses -> masked.load
361// * Narrow width by halfs excluding zero/undef lanes
362// * Vector incrementing address -> vector masked load
363Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
364 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
365 if (!ConstMask)
366 return nullptr;
367
368 // Vector splat address w/known mask -> scalar load
369 // Fold the gather to load the source vector first lane
370 // because it is reloading the same value each time
371 if (ConstMask->isAllOnesValue())
372 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
373 auto *VecTy = cast<VectorType>(II.getType());
374 const Align Alignment = II.getParamAlign(0).valueOrOne();
375 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
376 Alignment, "load.scalar");
377 Value *Shuf =
378 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
380 }
381
382 return nullptr;
383}
384
385// TODO, Obvious Missing Transforms:
386// * Single constant active lane -> store
387// * Adjacent vector addresses -> masked.store
388// * Narrow store width by halfs excluding zero/undef lanes
389// * Vector incrementing address -> vector masked store
390Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
391 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
392 if (!ConstMask)
393 return nullptr;
394
395 // If the mask is all zeros or poison, a scatter does nothing.
396 if (match(ConstMask, m_ZeroOrPoison()))
398
399 // Vector splat address -> scalar store
400 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
401 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
402 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
403 if (maskContainsAllOneOrUndef(ConstMask)) {
404 Align Alignment = II.getParamAlign(1).valueOrOne();
405 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
406 Alignment);
407 S->copyMetadata(II);
408 return S;
409 }
410 }
411 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
412 // lastlane), ptr
413 if (ConstMask->isAllOnesValue()) {
414 Align Alignment = II.getParamAlign(1).valueOrOne();
415 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
416 ElementCount VF = WideLoadTy->getElementCount();
417 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
418 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
419 Value *Extract =
420 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
421 StoreInst *S =
422 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
423 S->copyMetadata(II);
424 return S;
425 }
426 }
427 if (isa<ScalableVectorType>(ConstMask->getType()))
428 return nullptr;
429
430 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
431 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
432 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
433 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
434 PoisonElts))
435 return replaceOperand(II, 0, V);
436 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
437 PoisonElts))
438 return replaceOperand(II, 1, V);
439
440 return nullptr;
441}
442
443/// This function transforms launder.invariant.group and strip.invariant.group
444/// like:
445/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
446/// launder(strip(%x)) -> launder(%x)
447/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
448/// strip(launder(%x)) -> strip(%x)
449/// This is legal because it preserves the most recent information about
450/// the presence or absence of invariant.group.
452 InstCombinerImpl &IC) {
453 auto *Arg = II.getArgOperand(0);
454 auto *StrippedArg = Arg->stripPointerCasts();
455 auto *StrippedInvariantGroupsArg = StrippedArg;
456 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
457 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
458 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
459 break;
460 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
461 }
462 if (StrippedArg == StrippedInvariantGroupsArg)
463 return nullptr; // No launders/strips to remove.
464
465 Value *Result = nullptr;
466
467 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
468 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
469 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
470 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
471 else
473 "simplifyInvariantGroupIntrinsic only handles launder and strip");
474 if (Result->getType()->getPointerAddressSpace() !=
475 II.getType()->getPointerAddressSpace())
476 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
477
478 return cast<Instruction>(Result);
479}
480
482 assert((II.getIntrinsicID() == Intrinsic::cttz ||
483 II.getIntrinsicID() == Intrinsic::ctlz) &&
484 "Expected cttz or ctlz intrinsic");
485 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
486 Value *Op0 = II.getArgOperand(0);
487 Value *Op1 = II.getArgOperand(1);
488 Value *X;
489 // ctlz(bitreverse(x)) -> cttz(x)
490 // cttz(bitreverse(x)) -> ctlz(x)
491 if (match(Op0, m_BitReverse(m_Value(X)))) {
492 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
493 Function *F =
494 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
495 return CallInst::Create(F, {X, II.getArgOperand(1)});
496 }
497
498 if (II.getType()->isIntOrIntVectorTy(1)) {
499 // ctlz/cttz i1 Op0 --> not Op0
500 if (match(Op1, m_Zero()))
501 return BinaryOperator::CreateNot(Op0);
502 // If zero is poison, then the input can be assumed to be "true", so the
503 // instruction simplifies to "false".
504 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
505 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
506 }
507
508 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
509 if (II.hasOneUse() && match(Op1, m_Zero()) &&
510 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
511 II.dropUBImplyingAttrsAndMetadata();
512 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
513 }
514
515 Constant *C;
516
517 if (IsTZ) {
518 // cttz(-x) -> cttz(x)
519 if (match(Op0, m_Neg(m_Value(X))))
520 return IC.replaceOperand(II, 0, X);
521
522 // cttz(-x & x) -> cttz(x)
523 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
524 return IC.replaceOperand(II, 0, X);
525
526 // cttz(sext(x)) -> cttz(zext(x))
527 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
528 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
529 auto *CttzZext =
530 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
531 return IC.replaceInstUsesWith(II, CttzZext);
532 }
533
534 // Zext doesn't change the number of trailing zeros, so narrow:
535 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
536 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
537 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
538 IC.Builder.getTrue());
539 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
540 return IC.replaceInstUsesWith(II, ZextCttz);
541 }
542
543 // cttz(abs(x)) -> cttz(x)
544 // cttz(nabs(x)) -> cttz(x)
545 Value *Y;
547 if (SPF == SPF_ABS || SPF == SPF_NABS)
548 return IC.replaceOperand(II, 0, X);
549
551 return IC.replaceOperand(II, 0, X);
552
553 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
554 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
555 match(Op1, m_One())) {
556 Value *ConstCttz =
557 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
558 return BinaryOperator::CreateAdd(ConstCttz, X);
559 }
560
561 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
562 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
563 match(Op1, m_One())) {
564 Value *ConstCttz =
565 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
566 return BinaryOperator::CreateSub(ConstCttz, X);
567 }
568
569 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
570 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
571 Value *Width =
572 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
573 return BinaryOperator::CreateSub(Width, X);
574 }
575 } else {
576 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
577 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
578 match(Op1, m_One())) {
579 Value *ConstCtlz =
580 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
581 return BinaryOperator::CreateAdd(ConstCtlz, X);
582 }
583
584 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
585 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
586 match(Op1, m_One())) {
587 Value *ConstCtlz =
588 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
589 return BinaryOperator::CreateSub(ConstCtlz, X);
590 }
591
592 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
593 if (Op0->hasOneUse() &&
594 match(Op0,
596 Type *Ty = II.getType();
597 unsigned BitWidth = Ty->getScalarSizeInBits();
598 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
599 {X, IC.Builder.getFalse()});
600 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
601 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
602 }
603 }
604
605 // cttz(Pow2) -> Log2(Pow2)
606 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
607 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
608 if (IsTZ)
609 return IC.replaceInstUsesWith(II, R);
610 BinaryOperator *BO = BinaryOperator::CreateSub(
611 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
612 R);
613 BO->setHasNoSignedWrap();
615 return BO;
616 }
617
618 KnownBits Known = IC.computeKnownBits(Op0, &II);
619
620 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
621 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
622 : Known.countMaxLeadingZeros();
623 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
624 : Known.countMinLeadingZeros();
625
626 // If all bits above (ctlz) or below (cttz) the first known one are known
627 // zero, this value is constant.
628 // FIXME: This should be in InstSimplify because we're replacing an
629 // instruction with a constant.
630 if (PossibleZeros == DefiniteZeros) {
631 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
632 return IC.replaceInstUsesWith(II, C);
633 }
634
635 // If the input to cttz/ctlz is known to be non-zero,
636 // then change the 'ZeroIsPoison' parameter to 'true'
637 // because we know the zero behavior can't affect the result.
638 if (!Known.One.isZero() ||
640 if (!match(II.getArgOperand(1), m_One()))
641 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
642 }
643
644 // Add range attribute since known bits can't completely reflect what we know.
645 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
646 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
647 !II.getMetadata(LLVMContext::MD_range)) {
648 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
649 APInt(BitWidth, PossibleZeros + 1));
650 II.addRangeRetAttr(Range);
651 return &II;
652 }
653
654 return nullptr;
655}
656
658 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
659 "Expected ctpop intrinsic");
660 Type *Ty = II.getType();
661 unsigned BitWidth = Ty->getScalarSizeInBits();
662 Value *Op0 = II.getArgOperand(0);
663 Value *X, *Y;
664
665 // ctpop(bitreverse(x)) -> ctpop(x)
666 // ctpop(bswap(x)) -> ctpop(x)
667 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
668 return IC.replaceOperand(II, 0, X);
669
670 // ctpop(rot(x)) -> ctpop(x)
671 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
672 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
673 X == Y)
674 return IC.replaceOperand(II, 0, X);
675
676 // ctpop(x | -x) -> bitwidth - cttz(x, false)
677 if (Op0->hasOneUse() &&
678 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
679 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
680 {X, IC.Builder.getFalse()});
681 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
682 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
683 }
684
685 // ctpop(~x & (x - 1)) -> cttz(x, false)
686 if (match(Op0,
688 Function *F =
689 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
690 return CallInst::Create(F, {X, IC.Builder.getFalse()});
691 }
692
693 // Zext doesn't change the number of set bits, so narrow:
694 // ctpop (zext X) --> zext (ctpop X)
695 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
696 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
697 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
698 }
699
700 KnownBits Known(BitWidth);
701 IC.computeKnownBits(Op0, Known, &II);
702
703 // If all bits are zero except for exactly one fixed bit, then the result
704 // must be 0 or 1, and we can get that answer by shifting to LSB:
705 // ctpop (X & 32) --> (X & 32) >> 5
706 // TODO: Investigate removing this as its likely unnecessary given the below
707 // `isKnownToBeAPowerOfTwo` check.
708 if ((~Known.Zero).isPowerOf2())
709 return BinaryOperator::CreateLShr(
710 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
711
712 // More generally we can also handle non-constant power of 2 patterns such as
713 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
714 // ctpop(Pow2OrZero) --> icmp ne X, 0
715 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
716 return CastInst::Create(Instruction::ZExt,
719 Ty);
720
721 // Add range attribute since known bits can't completely reflect what we know.
722 if (BitWidth != 1) {
723 ConstantRange OldRange =
724 II.getRange().value_or(ConstantRange::getFull(BitWidth));
725
726 unsigned Lower = Known.countMinPopulation();
727 unsigned Upper = Known.countMaxPopulation() + 1;
728
729 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
731 Lower = 1;
732
734 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
735
736 if (Range != OldRange) {
737 II.addRangeRetAttr(Range);
738 return &II;
739 }
740 }
741
742 return nullptr;
743}
744
745/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
746/// at most two source operands are actually referenced.
748 bool IsExtension) {
749 // Bail out if the mask is not a constant.
750 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
751 if (!C)
752 return nullptr;
753
754 auto *RetTy = cast<FixedVectorType>(II.getType());
755 unsigned NumIndexes = RetTy->getNumElements();
756
757 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
758 if (!RetTy->getElementType()->isIntegerTy(8) ||
759 (NumIndexes != 8 && NumIndexes != 16))
760 return nullptr;
761
762 // For tbx instructions, the first argument is the "fallback" vector, which
763 // has the same length as the mask and return type.
764 unsigned int StartIndex = (unsigned)IsExtension;
765 auto *SourceTy =
766 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
767 // Note that the element count of each source vector does *not* need to be the
768 // same as the element count of the return type and mask! All source vectors
769 // must have the same element count as each other, though.
770 unsigned NumElementsPerSource = SourceTy->getNumElements();
771
772 // There are no tbl/tbx intrinsics for which the destination size exceeds the
773 // source size. However, our definitions of the intrinsics, at least in
774 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
775 // *could* technically happen.
776 if (NumIndexes > NumElementsPerSource)
777 return nullptr;
778
779 // The tbl/tbx intrinsics take several source operands followed by a mask
780 // operand.
781 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
782
783 // Map input operands to shuffle indices. This also helpfully deduplicates the
784 // input arguments, in case the same value is passed as an argument multiple
785 // times.
786 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
787 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
788 PoisonValue::get(SourceTy)};
789
790 int Indexes[16];
791 for (unsigned I = 0; I < NumIndexes; ++I) {
792 Constant *COp = C->getAggregateElement(I);
793
794 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
795 return nullptr;
796
797 if (isa<UndefValue>(COp)) {
798 Indexes[I] = -1;
799 continue;
800 }
801
802 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
803 // The index of the input argument that this index references (0 = first
804 // source argument, etc).
805 unsigned SourceOperandIndex = Index / NumElementsPerSource;
806 // The index of the element at that source operand.
807 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
808
809 Value *SourceOperand;
810 if (SourceOperandIndex >= NumSourceOperands) {
811 // This index is out of bounds. Map it to index into either the fallback
812 // vector (tbx) or vector of zeroes (tbl).
813 SourceOperandIndex = NumSourceOperands;
814 if (IsExtension) {
815 // For out-of-bounds indices in tbx, choose the `I`th element of the
816 // fallback.
817 SourceOperand = II.getArgOperand(0);
818 SourceOperandElementIndex = I;
819 } else {
820 // Otherwise, choose some element from the dummy vector of zeroes (we'll
821 // always choose the first).
822 SourceOperand = Constant::getNullValue(SourceTy);
823 SourceOperandElementIndex = 0;
824 }
825 } else {
826 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
827 }
828
829 // The source operand may be the fallback vector, which may not have the
830 // same number of elements as the source vector. In that case, we *could*
831 // choose to extend its length with another shufflevector, but it's simpler
832 // to just bail instead.
833 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
834 NumElementsPerSource)
835 return nullptr;
836
837 // We now know the source operand referenced by this index. Make it a
838 // shufflevector operand, if it isn't already.
839 unsigned NumSlots = ValueToShuffleSlot.size();
840 // This shuffle references more than two sources, and hence cannot be
841 // represented as a shufflevector.
842 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
843 return nullptr;
844
845 auto [It, Inserted] =
846 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
847 if (Inserted)
848 ShuffleOperands[It->getSecond()] = SourceOperand;
849
850 unsigned RemappedIndex =
851 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
852 Indexes[I] = RemappedIndex;
853 }
854
856 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
857 return IC.replaceInstUsesWith(II, Shuf);
858}
859
860// Returns true iff the 2 intrinsics have the same operands, limiting the
861// comparison to the first NumOperands.
862static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
863 unsigned NumOperands) {
864 assert(I.arg_size() >= NumOperands && "Not enough operands");
865 assert(E.arg_size() >= NumOperands && "Not enough operands");
866 for (unsigned i = 0; i < NumOperands; i++)
867 if (I.getArgOperand(i) != E.getArgOperand(i))
868 return false;
869 return true;
870}
871
872// Remove trivially empty start/end intrinsic ranges, i.e. a start
873// immediately followed by an end (ignoring debuginfo or other
874// start/end intrinsics in between). As this handles only the most trivial
875// cases, tracking the nesting level is not needed:
876//
877// call @llvm.foo.start(i1 0)
878// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
879// call @llvm.foo.end(i1 0)
880// call @llvm.foo.end(i1 0) ; &I
881static bool
883 std::function<bool(const IntrinsicInst &)> IsStart) {
884 // We start from the end intrinsic and scan backwards, so that InstCombine
885 // has already processed (and potentially removed) all the instructions
886 // before the end intrinsic.
887 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
888 for (; BI != BE; ++BI) {
889 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
890 if (I->isDebugOrPseudoInst() ||
891 I->getIntrinsicID() == EndI.getIntrinsicID())
892 continue;
893 if (IsStart(*I)) {
894 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
896 IC.eraseInstFromFunction(EndI);
897 return true;
898 }
899 // Skip start intrinsics that don't pair with this end intrinsic.
900 continue;
901 }
902 }
903 break;
904 }
905
906 return false;
907}
908
910 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
911 // Bail out on the case where the source va_list of a va_copy is destroyed
912 // immediately by a follow-up va_end.
913 return II.getIntrinsicID() == Intrinsic::vastart ||
914 (II.getIntrinsicID() == Intrinsic::vacopy &&
915 I.getArgOperand(0) != II.getArgOperand(1));
916 });
917 return nullptr;
918}
919
921 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
922 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
923 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
924 Call.setArgOperand(0, Arg1);
925 Call.setArgOperand(1, Arg0);
926 return &Call;
927 }
928 return nullptr;
929}
930
931/// Creates a result tuple for an overflow intrinsic \p II with a given
932/// \p Result and a constant \p Overflow value.
934 Constant *Overflow) {
935 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
936 StructType *ST = cast<StructType>(II->getType());
937 Constant *Struct = ConstantStruct::get(ST, V);
938 return InsertValueInst::Create(Struct, Result, 0);
939}
940
942InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
943 WithOverflowInst *WO = cast<WithOverflowInst>(II);
944 Value *OperationResult = nullptr;
945 Constant *OverflowResult = nullptr;
946 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
947 WO->getRHS(), *WO, OperationResult, OverflowResult))
948 return createOverflowTuple(WO, OperationResult, OverflowResult);
949
950 // See whether we can optimize the overflow check with assumption information.
951 for (User *U : WO->users()) {
952 if (!match(U, m_ExtractValue<1>(m_Value())))
953 continue;
954
955 for (auto &AssumeVH : AC.assumptionsFor(U)) {
956 if (!AssumeVH)
957 continue;
958 CallInst *I = cast<CallInst>(AssumeVH);
959 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
960 continue;
961 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
962 /*AllowEphemerals=*/true))
963 continue;
964 Value *Result =
965 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
966 Result->takeName(WO);
967 if (auto *Inst = dyn_cast<Instruction>(Result)) {
968 if (WO->isSigned())
969 Inst->setHasNoSignedWrap();
970 else
971 Inst->setHasNoUnsignedWrap();
972 }
973 return createOverflowTuple(WO, Result,
974 ConstantInt::getFalse(U->getType()));
975 }
976 }
977
978 return nullptr;
979}
980
981static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
982 Ty = Ty->getScalarType();
983 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
984}
985
986static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
987 Ty = Ty->getScalarType();
988 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
989}
990
991/// \returns the compare predicate type if the test performed by
992/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
993/// floating-point environment assumed for \p F for type \p Ty
995 const Function &F, Type *Ty) {
996 switch (static_cast<unsigned>(Mask)) {
997 case fcZero:
998 if (inputDenormalIsIEEE(F, Ty))
999 return FCmpInst::FCMP_OEQ;
1000 break;
1001 case fcZero | fcSubnormal:
1002 if (inputDenormalIsDAZ(F, Ty))
1003 return FCmpInst::FCMP_OEQ;
1004 break;
1005 case fcPositive | fcNegZero:
1006 if (inputDenormalIsIEEE(F, Ty))
1007 return FCmpInst::FCMP_OGE;
1008 break;
1010 if (inputDenormalIsDAZ(F, Ty))
1011 return FCmpInst::FCMP_OGE;
1012 break;
1014 if (inputDenormalIsIEEE(F, Ty))
1015 return FCmpInst::FCMP_OGT;
1016 break;
1017 case fcNegative | fcPosZero:
1018 if (inputDenormalIsIEEE(F, Ty))
1019 return FCmpInst::FCMP_OLE;
1020 break;
1022 if (inputDenormalIsDAZ(F, Ty))
1023 return FCmpInst::FCMP_OLE;
1024 break;
1026 if (inputDenormalIsIEEE(F, Ty))
1027 return FCmpInst::FCMP_OLT;
1028 break;
1029 case fcPosNormal | fcPosInf:
1030 if (inputDenormalIsDAZ(F, Ty))
1031 return FCmpInst::FCMP_OGT;
1032 break;
1033 case fcNegNormal | fcNegInf:
1034 if (inputDenormalIsDAZ(F, Ty))
1035 return FCmpInst::FCMP_OLT;
1036 break;
1037 case ~fcZero & ~fcNan:
1038 if (inputDenormalIsIEEE(F, Ty))
1039 return FCmpInst::FCMP_ONE;
1040 break;
1041 case ~(fcZero | fcSubnormal) & ~fcNan:
1042 if (inputDenormalIsDAZ(F, Ty))
1043 return FCmpInst::FCMP_ONE;
1044 break;
1045 default:
1046 break;
1047 }
1048
1050}
1051
1052Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1053 Value *Src0 = II.getArgOperand(0);
1054 Value *Src1 = II.getArgOperand(1);
1055 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1056 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1057 const bool IsUnordered = (Mask & fcNan) == fcNan;
1058 const bool IsOrdered = (Mask & fcNan) == fcNone;
1059 const FPClassTest OrderedMask = Mask & ~fcNan;
1060 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1061
1062 const bool IsStrict =
1063 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1064
1065 Value *FNegSrc;
1066 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
1067 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1068
1069 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
1070 return replaceOperand(II, 0, FNegSrc);
1071 }
1072
1073 Value *FAbsSrc;
1074 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
1075 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
1076 return replaceOperand(II, 0, FAbsSrc);
1077 }
1078
1079 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1080 (IsOrdered || IsUnordered) && !IsStrict) {
1081 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1082 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1083 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1084 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1086 FCmpInst::Predicate Pred =
1087 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1088 if (OrderedInvertedMask == fcInf)
1089 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1090
1091 Value *Fabs = Builder.CreateFAbs(Src0);
1092 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1093 CmpInf->takeName(&II);
1094 return replaceInstUsesWith(II, CmpInf);
1095 }
1096
1097 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1098 (IsOrdered || IsUnordered) && !IsStrict) {
1099 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1100 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1101 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1102 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1103 Constant *Inf =
1104 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1105 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1106 : Builder.CreateFCmpOEQ(Src0, Inf);
1107
1108 EqInf->takeName(&II);
1109 return replaceInstUsesWith(II, EqInf);
1110 }
1111
1112 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1113 (IsOrdered || IsUnordered) && !IsStrict) {
1114 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1115 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1116 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1117 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1119 OrderedInvertedMask == fcNegInf);
1120 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1121 : Builder.CreateFCmpONE(Src0, Inf);
1122 NeInf->takeName(&II);
1123 return replaceInstUsesWith(II, NeInf);
1124 }
1125
1126 if (Mask == fcNan && !IsStrict) {
1127 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1128 // exceptions.
1129 Value *IsNan =
1130 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1131 IsNan->takeName(&II);
1132 return replaceInstUsesWith(II, IsNan);
1133 }
1134
1135 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1136 // Equivalent of !isnan. Replace with standard fcmp.
1137 Value *FCmp =
1138 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1139 FCmp->takeName(&II);
1140 return replaceInstUsesWith(II, FCmp);
1141 }
1142
1144
1145 // Try to replace with an fcmp with 0
1146 //
1147 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1148 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1149 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1150 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1151 //
1152 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1153 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1154 //
1155 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1156 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1157 //
1158 if (!IsStrict && (IsOrdered || IsUnordered) &&
1159 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1160 Src0->getType())) !=
1163 // Equivalent of == 0.
1164 Value *FCmp = Builder.CreateFCmp(
1165 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1166 Src0, Zero);
1167
1168 FCmp->takeName(&II);
1169 return replaceInstUsesWith(II, FCmp);
1170 }
1171
1172 KnownFPClass Known =
1173 computeKnownFPClass(Src0, Mask, SQ.getWithInstruction(&II));
1174
1175 // Clear test bits we know must be false from the source value.
1176 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1177 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1178 if ((Mask & Known.KnownFPClasses) != Mask) {
1179 II.setArgOperand(
1180 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1181 return &II;
1182 }
1183
1184 // If none of the tests which can return false are possible, fold to true.
1185 // fp_class (nnan x), ~(qnan|snan) -> true
1186 // fp_class (ninf x), ~(ninf|pinf) -> true
1187 if (Mask == Known.KnownFPClasses)
1188 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1189
1190 return nullptr;
1191}
1192
1193static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1194 KnownBits Known = computeKnownBits(Op, SQ);
1195 if (Known.isNonNegative())
1196 return false;
1197 if (Known.isNegative())
1198 return true;
1199
1200 Value *X, *Y;
1201 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1203
1204 return std::nullopt;
1205}
1206
1207static std::optional<bool> getKnownSignOrZero(Value *Op,
1208 const SimplifyQuery &SQ) {
1209 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1210 return Sign;
1211
1212 Value *X, *Y;
1213 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1215
1216 return std::nullopt;
1217}
1218
1219/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1220static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1221 const SimplifyQuery &SQ) {
1222 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1223 if (!Known1)
1224 return false;
1225 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1226 if (!Known0)
1227 return false;
1228 return *Known0 == *Known1;
1229}
1230
1231// Determines if ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b)) is safe.
1232//
1233// This is true if, when the add saturates, the resulting ldexp is guaranteed to
1234// produce 0 or inf.
1235static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy) {
1236 const fltSemantics &FltSem = FpTy->getScalarType()->getFltSemantics();
1237 if (!APFloat::semanticsHasInf(FltSem))
1238 return false;
1239
1240 // Cap ExpBits at 32 because scalbn takes an int. This is sufficient for any
1241 // reasonable fp type (for example, `double` only has 11 exponent bits).
1242 unsigned ExpBits = std::min(ExpTy->getScalarSizeInBits(), 32u);
1243 int SignedMax = static_cast<int>(maxIntN(ExpBits));
1244 int SignedMin = static_cast<int>(minIntN(ExpBits));
1245 APFloat ScaledUp = scalbn(APFloat::getSmallest(FltSem), SignedMax,
1247 APFloat ScaledDown = scalbn(APFloat::getLargest(FltSem), SignedMin,
1249 return ScaledUp.isInfinity() && ScaledDown.isZero();
1250}
1251
1252/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1253/// can trigger other combines.
1255 InstCombiner::BuilderTy &Builder) {
1256 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1257 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1258 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1259 "Expected a min or max intrinsic");
1260
1261 // TODO: Match vectors with undef elements, but undef may not propagate.
1262 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1263 Value *X;
1264 const APInt *C0, *C1;
1265 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1266 !match(Op1, m_APInt(C1)))
1267 return nullptr;
1268
1269 // Check for necessary no-wrap and overflow constraints.
1270 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1271 auto *Add = cast<BinaryOperator>(Op0);
1272 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1273 (!IsSigned && !Add->hasNoUnsignedWrap()))
1274 return nullptr;
1275
1276 // If the constant difference overflows, then instsimplify should reduce the
1277 // min/max to the add or C1.
1278 bool Overflow;
1279 APInt CDiff =
1280 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1281 assert(!Overflow && "Expected simplify of min/max");
1282
1283 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1284 // Note: the "mismatched" no-overflow setting does not propagate.
1285 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1286 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1287 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1288 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1289}
1290/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1291Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1292 Type *Ty = MinMax1.getType();
1293
1294 // We are looking for a tree of:
1295 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1296 // Where the min and max could be reversed
1297 Instruction *MinMax2;
1298 BinaryOperator *AddSub;
1299 const APInt *MinValue, *MaxValue;
1300 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1301 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1302 return nullptr;
1303 } else if (match(&MinMax1,
1304 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1305 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1306 return nullptr;
1307 } else
1308 return nullptr;
1309
1310 // Check that the constants clamp a saturate, and that the new type would be
1311 // sensible to convert to.
1312 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1313 return nullptr;
1314 // In what bitwidth can this be treated as saturating arithmetics?
1315 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1316 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1317 // good first approximation for what should be done there.
1318 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1319 return nullptr;
1320
1321 // Also make sure that the inner min/max and the add/sub have one use.
1322 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1323 return nullptr;
1324
1325 // Create the new type (which can be a vector type)
1326 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1327
1328 Intrinsic::ID IntrinsicID;
1329 if (AddSub->getOpcode() == Instruction::Add)
1330 IntrinsicID = Intrinsic::sadd_sat;
1331 else if (AddSub->getOpcode() == Instruction::Sub)
1332 IntrinsicID = Intrinsic::ssub_sat;
1333 else
1334 return nullptr;
1335
1336 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1337 // is usually achieved via a sext from a smaller type.
1338 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1339 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1340 return nullptr;
1341
1342 // Finally create and return the sat intrinsic, truncated to the new type
1343 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1344 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1345 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1346 return CastInst::Create(Instruction::SExt, Sat, Ty);
1347}
1348
1349
1350/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1351/// can only be one of two possible constant values -- turn that into a select
1352/// of constants.
1354 InstCombiner::BuilderTy &Builder) {
1355 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1356 Value *X;
1357 const APInt *C0, *C1;
1358 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1359 return nullptr;
1360
1362 switch (II->getIntrinsicID()) {
1363 case Intrinsic::smax:
1364 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1365 Pred = ICmpInst::ICMP_SGT;
1366 break;
1367 case Intrinsic::smin:
1368 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1369 Pred = ICmpInst::ICMP_SLT;
1370 break;
1371 case Intrinsic::umax:
1372 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1373 Pred = ICmpInst::ICMP_UGT;
1374 break;
1375 case Intrinsic::umin:
1376 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1377 Pred = ICmpInst::ICMP_ULT;
1378 break;
1379 default:
1380 llvm_unreachable("Expected min/max intrinsic");
1381 }
1382 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1383 return nullptr;
1384
1385 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1386 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1387 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1388 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1389}
1390
1391/// If this min/max has a constant operand and an operand that is a matching
1392/// min/max with a constant operand, constant-fold the 2 constant operands.
1394 IRBuilderBase &Builder,
1395 const SimplifyQuery &SQ) {
1396 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1397 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1398 if (!LHS)
1399 return nullptr;
1400
1401 Constant *C0, *C1;
1402 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1403 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1404 return nullptr;
1405
1406 // max (max X, C0), C1 --> max X, (max C0, C1)
1407 // min (min X, C0), C1 --> min X, (min C0, C1)
1408 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1409 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1410 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1411 if (InnerMinMaxID != MinMaxID &&
1412 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1413 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1414 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1415 return nullptr;
1416
1418 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1419 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1420 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1421 {LHS->getArgOperand(0), NewC});
1422}
1423
1424/// If this min/max has a matching min/max operand with a constant, try to push
1425/// the constant operand into this instruction. This can enable more folds.
1426static Instruction *
1428 InstCombiner::BuilderTy &Builder) {
1429 // Match and capture a min/max operand candidate.
1430 Value *X, *Y;
1431 Constant *C;
1432 Instruction *Inner;
1434 m_Instruction(Inner),
1436 m_Value(Y))))
1437 return nullptr;
1438
1439 // The inner op must match. Check for constants to avoid infinite loops.
1440 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1441 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1442 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1444 return nullptr;
1445
1446 // max (max X, C), Y --> max (max X, Y), C
1448 MinMaxID, II->getType());
1449 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1450 NewInner->takeName(Inner);
1451 return CallInst::Create(MinMax, {NewInner, C});
1452}
1453
1454/// Reduce a sequence of min/max intrinsics with a common operand.
1456 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1457 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1458 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1459 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1460 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1461 RHS->getIntrinsicID() != MinMaxID ||
1462 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1463 return nullptr;
1464
1465 Value *A = LHS->getArgOperand(0);
1466 Value *B = LHS->getArgOperand(1);
1467 Value *C = RHS->getArgOperand(0);
1468 Value *D = RHS->getArgOperand(1);
1469
1470 // Look for a common operand.
1471 Value *MinMaxOp = nullptr;
1472 Value *ThirdOp = nullptr;
1473 if (LHS->hasOneUse()) {
1474 // If the LHS is only used in this chain and the RHS is used outside of it,
1475 // reuse the RHS min/max because that will eliminate the LHS.
1476 if (D == A || C == A) {
1477 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1478 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1479 MinMaxOp = RHS;
1480 ThirdOp = B;
1481 } else if (D == B || C == B) {
1482 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1483 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1484 MinMaxOp = RHS;
1485 ThirdOp = A;
1486 }
1487 } else {
1488 assert(RHS->hasOneUse() && "Expected one-use operand");
1489 // Reuse the LHS. This will eliminate the RHS.
1490 if (D == A || D == B) {
1491 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1492 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1493 MinMaxOp = LHS;
1494 ThirdOp = C;
1495 } else if (C == A || C == B) {
1496 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1497 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1498 MinMaxOp = LHS;
1499 ThirdOp = D;
1500 }
1501 }
1502
1503 if (!MinMaxOp || !ThirdOp)
1504 return nullptr;
1505
1506 Module *Mod = II->getModule();
1507 Function *MinMax =
1508 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1509 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1510}
1511
1512/// If all arguments of the intrinsic are unary shuffles with the same mask,
1513/// try to shuffle after the intrinsic.
1516 if (!II->getType()->isVectorTy() ||
1517 !isTriviallyVectorizable(II->getIntrinsicID()) ||
1518 !II->getCalledFunction()->isSpeculatable())
1519 return nullptr;
1520
1521 Value *X;
1522 Constant *C;
1523 ArrayRef<int> Mask;
1524 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1525 return isa<Constant>(Arg.get()) ||
1526 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1527 Arg.getOperandNo(), nullptr);
1528 });
1529 if (!NonConstArg ||
1530 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1531 return nullptr;
1532
1533 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1534 // instructions.
1535 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1536 return nullptr;
1537
1538 // See if all arguments are shuffled with the same mask.
1540 Type *SrcTy = X->getType();
1541 for (Use &Arg : II->args()) {
1542 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1543 Arg.getOperandNo(), nullptr))
1544 NewArgs.push_back(Arg);
1545 else if (match(&Arg,
1546 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1547 X->getType() == SrcTy)
1548 NewArgs.push_back(X);
1549 else if (match(&Arg, m_ImmConstant(C))) {
1550 // If it's a constant, try find the constant that would be shuffled to C.
1551 if (Constant *ShuffledC =
1552 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1553 NewArgs.push_back(ShuffledC);
1554 else
1555 return nullptr;
1556 } else
1557 return nullptr;
1558 }
1559
1560 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1561 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1562 // Result type might be a different vector width.
1563 // TODO: Check that the result type isn't widened?
1564 VectorType *ResTy =
1565 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1566 Value *NewIntrinsic =
1567 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1568 return new ShuffleVectorInst(NewIntrinsic, Mask);
1569}
1570
1571/// If all arguments of the intrinsic are reverses, try to pull the reverse
1572/// after the intrinsic.
1574 if (!II->getType()->isVectorTy() ||
1575 !isTriviallyVectorizable(II->getIntrinsicID()))
1576 return nullptr;
1577
1578 // At least 1 operand must be a reverse with 1 use because we are creating 2
1579 // instructions.
1580 if (none_of(II->args(), [](Value *V) {
1581 return match(V, m_OneUse(m_VecReverse(m_Value())));
1582 }))
1583 return nullptr;
1584
1585 Value *X;
1586 Constant *C;
1587 SmallVector<Value *> NewArgs;
1588 for (Use &Arg : II->args()) {
1589 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1590 Arg.getOperandNo(), nullptr))
1591 NewArgs.push_back(Arg);
1592 else if (match(&Arg, m_VecReverse(m_Value(X))))
1593 NewArgs.push_back(X);
1594 else if (isSplatValue(Arg))
1595 NewArgs.push_back(Arg);
1596 else if (match(&Arg, m_ImmConstant(C)))
1597 NewArgs.push_back(Builder.CreateVectorReverse(C));
1598 else
1599 return nullptr;
1600 }
1601
1602 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1603 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1604 Value *NewIntrinsic = Builder.CreateIntrinsic(
1605 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1606 return Builder.CreateVectorReverse(NewIntrinsic);
1607}
1608
1609/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1610/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1611/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1612template <Intrinsic::ID IntrID>
1614 InstCombiner::BuilderTy &Builder) {
1615 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1616 "This helper only supports BSWAP and BITREVERSE intrinsics");
1617
1618 Value *X, *Y;
1619 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1620 // don't match ConstantExpr that aren't meaningful for this transform.
1623 Value *OldReorderX, *OldReorderY;
1625
1626 // If both X and Y are bswap/bitreverse, the transform reduces the number
1627 // of instructions even if there's multiuse.
1628 // If only one operand is bswap/bitreverse, we need to ensure the operand
1629 // have only one use.
1630 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1631 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1632 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1633 }
1634
1635 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1636 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1637 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1638 }
1639
1640 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1641 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1642 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1643 }
1644 }
1645 return nullptr;
1646}
1647
1648/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1649/// `f(f(x, y), y) == f(x, y)` holds.
1651 switch (IID) {
1652 case Intrinsic::smax:
1653 case Intrinsic::smin:
1654 case Intrinsic::umax:
1655 case Intrinsic::umin:
1656 case Intrinsic::maximum:
1657 case Intrinsic::minimum:
1658 case Intrinsic::maximumnum:
1659 case Intrinsic::minimumnum:
1660 case Intrinsic::maxnum:
1661 case Intrinsic::minnum:
1662 return true;
1663 default:
1664 return false;
1665 }
1666}
1667
1668/// Attempt to simplify value-accumulating recurrences of kind:
1669/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1670/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1671/// And let the idempotent binary intrinsic be hoisted, when the operands are
1672/// known to be loop-invariant.
1674 IntrinsicInst *II) {
1675 PHINode *PN;
1676 Value *Init, *OtherOp;
1677
1678 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1679 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1680 auto IID = II->getIntrinsicID();
1681 if (!isIdempotentBinaryIntrinsic(IID) ||
1683 !IC.getDominatorTree().dominates(OtherOp, PN))
1684 return nullptr;
1685
1686 auto *InvariantBinaryInst =
1687 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1688 if (isa<FPMathOperator>(InvariantBinaryInst))
1689 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1690 return InvariantBinaryInst;
1691}
1692
1693static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1694 if (!CanReorderLanes)
1695 return nullptr;
1696
1697 Value *V;
1698 if (match(Arg, m_VecReverse(m_Value(V))))
1699 return V;
1700
1701 ArrayRef<int> Mask;
1702 if (!isa<FixedVectorType>(Arg->getType()) ||
1703 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1704 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1705 return nullptr;
1706
1707 int Sz = Mask.size();
1708 SmallBitVector UsedIndices(Sz);
1709 for (int Idx : Mask) {
1710 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1711 return nullptr;
1712 UsedIndices.set(Idx);
1713 }
1714
1715 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1716 // other changes.
1717 return UsedIndices.all() ? V : nullptr;
1718}
1719
1720/// Fold an unsigned minimum of trailing or leading zero bits counts:
1721/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1722/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1723/// >> ConstOp))
1724/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1725/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1726template <Intrinsic::ID IntrID>
1727static Value *
1729 const DataLayout &DL,
1730 InstCombiner::BuilderTy &Builder) {
1731 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1732 "This helper only supports cttz and ctlz intrinsics");
1733
1734 Value *CtOp1, *CtOp2;
1735 Value *ZeroUndef1, *ZeroUndef2;
1736 if (!match(I0, m_OneUse(
1737 m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
1738 return nullptr;
1739
1740 if (match(I1,
1741 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
1742 return Builder.CreateBinaryIntrinsic(
1743 IntrID, Builder.CreateOr(CtOp1, CtOp2),
1744 Builder.CreateOr(ZeroUndef1, ZeroUndef2));
1745
1746 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1747 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1748 if (!match(I1, m_CheckedInt(LessBitWidth)))
1749 // We have a constant >= BitWidth (which can be handled by CVP)
1750 // or a non-splat vector with elements < and >= BitWidth
1751 return nullptr;
1752
1753 Type *Ty = I1->getType();
1755 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1756 IntrID == Intrinsic::cttz
1757 ? ConstantInt::get(Ty, 1)
1758 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1759 cast<Constant>(I1), DL);
1760 return Builder.CreateBinaryIntrinsic(
1761 IntrID, Builder.CreateOr(CtOp1, NewConst),
1762 ConstantInt::getTrue(ZeroUndef1->getType()));
1763}
1764
1765/// Return whether "X LOp (Y ROp Z)" is always equal to
1766/// "(X LOp Y) ROp (X LOp Z)".
1768 bool HasNSW, Intrinsic::ID ROp) {
1769 switch (ROp) {
1770 case Intrinsic::umax:
1771 case Intrinsic::umin:
1772 if (HasNUW && LOp == Instruction::Add)
1773 return true;
1774 if (HasNUW && LOp == Instruction::Shl)
1775 return true;
1776 return false;
1777 case Intrinsic::smax:
1778 case Intrinsic::smin:
1779 return HasNSW && LOp == Instruction::Add;
1780 default:
1781 return false;
1782 }
1783}
1784
1785/// Return whether "(X ROp Y) LOp Z" is always equal to
1786/// "(X LOp Z) ROp (Y LOp Z)".
1788 bool HasNSW, Intrinsic::ID ROp) {
1789 if (Instruction::isCommutative(LOp) || LOp == Instruction::Shl)
1790 return leftDistributesOverRight(LOp, HasNUW, HasNSW, ROp);
1791 switch (ROp) {
1792 case Intrinsic::umax:
1793 case Intrinsic::umin:
1794 return HasNUW && LOp == Instruction::Sub;
1795 case Intrinsic::smax:
1796 case Intrinsic::smin:
1797 return HasNSW && LOp == Instruction::Sub;
1798 default:
1799 return false;
1800 }
1801}
1802
1803// Attempts to factorise a common term
1804// in an instruction that has the form "(A op' B) op (C op' D)
1805// where op is an intrinsic and op' is a binop
1806static Value *
1808 InstCombiner::BuilderTy &Builder) {
1809 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1810 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1811
1814
1815 if (!Op0 || !Op1)
1816 return nullptr;
1817
1818 if (Op0->getOpcode() != Op1->getOpcode())
1819 return nullptr;
1820
1821 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1822 return nullptr;
1823
1824 Instruction::BinaryOps InnerOpcode =
1825 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1826 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1827 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1828
1829 Value *A = Op0->getOperand(0);
1830 Value *B = Op0->getOperand(1);
1831 Value *C = Op1->getOperand(0);
1832 Value *D = Op1->getOperand(1);
1833
1834 // Attempts to swap variables such that A equals C or B equals D,
1835 // if the inner operation is commutative.
1836 if (Op0->isCommutative() && A != C && B != D) {
1837 if (A == D || B == C)
1838 std::swap(C, D);
1839 else
1840 return nullptr;
1841 }
1842
1843 BinaryOperator *NewBinop;
1844 if (A == C &&
1845 leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode)) {
1846 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1847 NewBinop =
1848 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1849 } else if (B == D && rightDistributesOverLeft(InnerOpcode, HasNUW, HasNSW,
1850 TopLevelOpcode)) {
1851 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1852 NewBinop =
1853 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1854 } else {
1855 return nullptr;
1856 }
1857
1858 NewBinop->setHasNoUnsignedWrap(HasNUW);
1859 NewBinop->setHasNoSignedWrap(HasNSW);
1860
1861 return NewBinop;
1862}
1863
1865 Value *Arg0 = II->getArgOperand(0);
1866 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1867 if (!ShiftConst)
1868 return nullptr;
1869
1870 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1871 bool AllPositive = true;
1872 bool AllNegative = true;
1873
1874 auto Check = [&](Constant *C) -> bool {
1875 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1876 const APInt &V = CI->getValue();
1877 if (V.isNonNegative()) {
1878 AllNegative = false;
1879 return AllPositive && V.ult(ElemBits);
1880 }
1881 AllPositive = false;
1882 return AllNegative && V.sgt(-ElemBits);
1883 }
1884 return false;
1885 };
1886
1887 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1888 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1889 if (!Check(ShiftConst->getAggregateElement(I)))
1890 return nullptr;
1891 }
1892
1893 } else if (!Check(ShiftConst))
1894 return nullptr;
1895
1896 IRBuilderBase &B = IC.Builder;
1897 if (AllPositive)
1898 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1899
1900 Value *NegAmt = B.CreateNeg(ShiftConst);
1901 Intrinsic::ID IID = II->getIntrinsicID();
1902 const bool IsSigned =
1903 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1904 Value *Result =
1905 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1906 return IC.replaceInstUsesWith(*II, Result);
1907}
1908
1909/// CallInst simplification. This mostly only handles folding of intrinsic
1910/// instructions. For normal calls, it allows visitCallBase to do the heavy
1911/// lifting.
1913 // Don't try to simplify calls without uses. It will not do anything useful,
1914 // but will result in the following folds being skipped.
1915 if (!CI.use_empty()) {
1916 SmallVector<Value *, 8> Args(CI.args());
1917 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1918 SQ.getWithInstruction(&CI)))
1919 return replaceInstUsesWith(CI, V);
1920 }
1921
1922 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1923 return visitFree(CI, FreedOp);
1924
1925 // If the caller function (i.e. us, the function that contains this CallInst)
1926 // is nounwind, mark the call as nounwind, even if the callee isn't.
1927 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1928 CI.setDoesNotThrow();
1929 return &CI;
1930 }
1931
1933 if (!II)
1934 return visitCallBase(CI);
1935
1936 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1937 // instead of in visitCallBase.
1938 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1939 if (auto NumBytes = MI->getLengthInBytes()) {
1940 // memmove/cpy/set of zero bytes is a noop.
1941 if (NumBytes->isZero())
1942 return eraseInstFromFunction(CI);
1943
1944 // For atomic unordered mem intrinsics if len is not a positive or
1945 // not a multiple of element size then behavior is undefined.
1946 if (MI->isAtomic() &&
1947 (NumBytes->isNegative() ||
1948 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1950 assert(MI->getType()->isVoidTy() &&
1951 "non void atomic unordered mem intrinsic");
1952 return eraseInstFromFunction(*MI);
1953 }
1954 }
1955
1956 // No other transformations apply to volatile transfers.
1957 if (MI->isVolatile())
1958 return nullptr;
1959
1961 // memmove(x,x,size) -> noop.
1962 if (MTI->getSource() == MTI->getDest())
1963 return eraseInstFromFunction(CI);
1964 }
1965
1966 auto IsPointerUndefined = [MI](Value *Ptr) {
1967 return isa<ConstantPointerNull>(Ptr) &&
1969 MI->getFunction(),
1970 cast<PointerType>(Ptr->getType())->getAddressSpace());
1971 };
1972 bool SrcIsUndefined = false;
1973 // If we can determine a pointer alignment that is bigger than currently
1974 // set, update the alignment.
1975 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1977 return I;
1978 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1979 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1980 if (Instruction *I = SimplifyAnyMemSet(MSI))
1981 return I;
1982 }
1983
1984 // If src/dest is null, this memory intrinsic must be a noop.
1985 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1986 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1987 return eraseInstFromFunction(CI);
1988 }
1989
1990 // If we have a memmove and the source operation is a constant global,
1991 // then the source and dest pointers can't alias, so we can change this
1992 // into a call to memcpy.
1993 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1994 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1995 if (GVSrc->isConstant()) {
1996 Module *M = CI.getModule();
1997 Intrinsic::ID MemCpyID =
1998 MMI->isAtomic()
1999 ? Intrinsic::memcpy_element_unordered_atomic
2000 : Intrinsic::memcpy;
2001 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
2002 CI.getArgOperand(1)->getType(),
2003 CI.getArgOperand(2)->getType() };
2005 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
2006 return II;
2007 }
2008 }
2009 }
2010
2011 // For fixed width vector result intrinsics, use the generic demanded vector
2012 // support.
2013 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
2014 auto VWidth = IIFVTy->getNumElements();
2015 APInt PoisonElts(VWidth, 0);
2016 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2017 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
2018 if (V != II)
2019 return replaceInstUsesWith(*II, V);
2020 return II;
2021 }
2022 }
2023
2024 if (II->isCommutative()) {
2025 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
2026 replaceOperand(*II, 0, Pair->first);
2027 replaceOperand(*II, 1, Pair->second);
2028 return II;
2029 }
2030
2031 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
2032 return NewCall;
2033 }
2034
2035 // Unused constrained FP intrinsic calls may have declared side effect, which
2036 // prevents it from being removed. In some cases however the side effect is
2037 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
2038 // returns a replacement, the call may be removed.
2039 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
2040 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
2041 return eraseInstFromFunction(CI);
2042 }
2043
2044 Intrinsic::ID IID = II->getIntrinsicID();
2045 switch (IID) {
2046 case Intrinsic::objectsize: {
2047 SmallVector<Instruction *> InsertedInstructions;
2048 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
2049 &InsertedInstructions)) {
2050 for (Instruction *Inserted : InsertedInstructions)
2051 Worklist.add(Inserted);
2052 return replaceInstUsesWith(CI, V);
2053 }
2054 return nullptr;
2055 }
2056 case Intrinsic::abs: {
2057 Value *IIOperand = II->getArgOperand(0);
2058 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2059
2060 // abs(-x) -> abs(x)
2061 Value *X;
2062 if (match(IIOperand, m_Neg(m_Value(X)))) {
2063 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2064 replaceOperand(*II, 1, Builder.getTrue());
2065 return replaceOperand(*II, 0, X);
2066 }
2067 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2068 return replaceOperand(*II, 0, X);
2069
2070 Value *Y;
2071 // abs(a * abs(b)) -> abs(a * b)
2072 if (match(IIOperand,
2075 bool NSW =
2076 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2077 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2078 return replaceOperand(*II, 0, XY);
2079 }
2080
2081 if (std::optional<bool> Known =
2082 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2083 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2084 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2085 if (!*Known)
2086 return replaceInstUsesWith(*II, IIOperand);
2087
2088 // abs(x) -> -x if x < 0
2089 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2090 if (IntMinIsPoison)
2091 return BinaryOperator::CreateNSWNeg(IIOperand);
2092 return BinaryOperator::CreateNeg(IIOperand);
2093 }
2094
2095 // abs (sext X) --> zext (abs X*)
2096 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2097 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2098 Value *NarrowAbs =
2099 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2100 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2101 }
2102
2103 // Match a complicated way to check if a number is odd/even:
2104 // abs (srem X, 2) --> and X, 1
2105 const APInt *C;
2106 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2107 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2108
2109 break;
2110 }
2111 case Intrinsic::umin: {
2112 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2113 // umin(x, 1) == zext(x != 0)
2114 if (match(I1, m_One())) {
2115 assert(II->getType()->getScalarSizeInBits() != 1 &&
2116 "Expected simplify of umin with max constant");
2117 Value *Zero = Constant::getNullValue(I0->getType());
2118 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2119 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2120 }
2121 // umin(cttz(x), const) --> cttz(x | (1 << const))
2122 if (Value *FoldedCttz =
2124 I0, I1, DL, Builder))
2125 return replaceInstUsesWith(*II, FoldedCttz);
2126 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2127 if (Value *FoldedCtlz =
2129 I0, I1, DL, Builder))
2130 return replaceInstUsesWith(*II, FoldedCtlz);
2131 [[fallthrough]];
2132 }
2133 case Intrinsic::umax: {
2134 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2135 Value *X, *Y;
2136 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2137 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2138 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2139 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2140 }
2141 Constant *C;
2142 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2143 I0->hasOneUse()) {
2144 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2145 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2146 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2147 }
2148 }
2149 // If C is not 0:
2150 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2151 // If C is not 0 or 1:
2152 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2153 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2154 const APInt *C;
2155 Value *X;
2156 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2157 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2158 return nullptr;
2159 if (C->isZero())
2160 return nullptr;
2161 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2162 return nullptr;
2163
2164 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2165 Value *NewSelect = nullptr;
2166 NewSelect = Builder.CreateSelectWithUnknownProfile(
2167 Cmp, ConstantInt::get(X->getType(), 1), A, DEBUG_TYPE);
2168 return replaceInstUsesWith(*II, NewSelect);
2169 };
2170
2171 if (IID == Intrinsic::umax) {
2172 if (Instruction *I = foldMaxMulShift(I0, I1))
2173 return I;
2174 if (Instruction *I = foldMaxMulShift(I1, I0))
2175 return I;
2176 }
2177
2178 // If both operands of unsigned min/max are sign-extended, it is still ok
2179 // to narrow the operation.
2180 [[fallthrough]];
2181 }
2182 case Intrinsic::smax:
2183 case Intrinsic::smin: {
2184 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2185 Value *X, *Y;
2186 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2187 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2188 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2189 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2190 }
2191
2192 Constant *C;
2193 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2194 I0->hasOneUse()) {
2195 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2196 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2197 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2198 }
2199 }
2200
2201 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2202 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2203 const APInt *MinC, *MaxC;
2204 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2205 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2206 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2207 Value *NewMax = Builder.CreateBinaryIntrinsic(
2208 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2209 return replaceInstUsesWith(
2210 *II, Builder.CreateBinaryIntrinsic(
2211 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2212 };
2213 if (IID == Intrinsic::smax &&
2215 m_APInt(MinC)))) &&
2216 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2217 return CreateCanonicalClampForm(true);
2218 if (IID == Intrinsic::umax &&
2220 m_APInt(MinC)))) &&
2221 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2222 return CreateCanonicalClampForm(false);
2223
2224 // umin(i1 X, i1 Y) -> and i1 X, Y
2225 // smax(i1 X, i1 Y) -> and i1 X, Y
2226 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2227 II->getType()->isIntOrIntVectorTy(1)) {
2228 return BinaryOperator::CreateAnd(I0, I1);
2229 }
2230
2231 // umax(i1 X, i1 Y) -> or i1 X, Y
2232 // smin(i1 X, i1 Y) -> or i1 X, Y
2233 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2234 II->getType()->isIntOrIntVectorTy(1)) {
2235 return BinaryOperator::CreateOr(I0, I1);
2236 }
2237
2238 // smin(smax(X, -1), 1) -> scmp(X, 0)
2239 // smax(smin(X, 1), -1) -> scmp(X, 0)
2240 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2241 // And i1's have been changed to and/ors
2242 // So we only need to check for smin
2243 if (IID == Intrinsic::smin) {
2244 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2245 match(I1, m_One())) {
2246 Value *Zero = ConstantInt::get(X->getType(), 0);
2247 return replaceInstUsesWith(
2248 CI,
2249 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2250 }
2251 }
2252
2253 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2254 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2255 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2256 // TODO: Canonicalize neg after min/max if I1 is constant.
2257 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2258 (I0->hasOneUse() || I1->hasOneUse())) {
2260 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2261 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2262 }
2263 }
2264
2265 // (umax X, (xor X, Pow2))
2266 // -> (or X, Pow2)
2267 // (umin X, (xor X, Pow2))
2268 // -> (and X, ~Pow2)
2269 // (smax X, (xor X, Pos_Pow2))
2270 // -> (or X, Pos_Pow2)
2271 // (smin X, (xor X, Pos_Pow2))
2272 // -> (and X, ~Pos_Pow2)
2273 // (smax X, (xor X, Neg_Pow2))
2274 // -> (and X, ~Neg_Pow2)
2275 // (smin X, (xor X, Neg_Pow2))
2276 // -> (or X, Neg_Pow2)
2277 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2278 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2279 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2280 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2281 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2282
2283 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2284 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2285 if (KnownSign == std::nullopt) {
2286 UseOr = false;
2287 UseAndN = false;
2288 } else if (*KnownSign /* true is Signed. */) {
2289 UseOr ^= true;
2290 UseAndN ^= true;
2291 Type *Ty = I0->getType();
2292 // Negative power of 2 must be IntMin. It's possible to be able to
2293 // prove negative / power of 2 without actually having known bits, so
2294 // just get the value by hand.
2296 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2297 }
2298 }
2299 if (UseOr)
2300 return BinaryOperator::CreateOr(I0, X);
2301 else if (UseAndN)
2302 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2303 }
2304
2305 // If we can eliminate ~A and Y is free to invert:
2306 // max ~A, Y --> ~(min A, ~Y)
2307 //
2308 // Examples:
2309 // max ~A, ~Y --> ~(min A, Y)
2310 // max ~A, C --> ~(min A, ~C)
2311 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2312 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2313 Value *A;
2314 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2315 !isFreeToInvert(A, A->hasOneUse())) {
2316 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2318 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2319 return BinaryOperator::CreateNot(InvMaxMin);
2320 }
2321 }
2322 return nullptr;
2323 };
2324
2325 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2326 return I;
2327 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2328 return I;
2329
2331 return I;
2332
2333 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2334 const APInt *RHSC;
2335 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2336 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2337 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2338 ConstantInt::get(II->getType(), *RHSC));
2339
2340 // smax(X, -X) --> abs(X)
2341 // smin(X, -X) --> -abs(X)
2342 // umax(X, -X) --> -abs(X)
2343 // umin(X, -X) --> abs(X)
2344 if (isKnownNegation(I0, I1)) {
2345 // We can choose either operand as the input to abs(), but if we can
2346 // eliminate the only use of a value, that's better for subsequent
2347 // transforms/analysis.
2348 if (I0->hasOneUse() && !I1->hasOneUse())
2349 std::swap(I0, I1);
2350
2351 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2352 // operation and potentially its negation.
2353 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2354 Value *Abs = Builder.CreateBinaryIntrinsic(
2355 Intrinsic::abs, I0,
2356 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2357
2358 // We don't have a "nabs" intrinsic, so negate if needed based on the
2359 // max/min operation.
2360 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2361 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2362 return replaceInstUsesWith(CI, Abs);
2363 }
2364
2366 return Sel;
2367
2368 if (Instruction *SAdd = matchSAddSubSat(*II))
2369 return SAdd;
2370
2371 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2372 return replaceInstUsesWith(*II, NewMinMax);
2373
2375 return R;
2376
2377 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2378 return NewMinMax;
2379
2380 // Try to fold minmax with constant RHS based on range information
2381 if (match(I1, m_APIntAllowPoison(RHSC))) {
2382 ICmpInst::Predicate Pred =
2384 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2386 I0, IsSigned, SQ.getWithInstruction(II));
2387 if (!LHS_CR.isFullSet()) {
2388 if (LHS_CR.icmp(Pred, *RHSC))
2389 return replaceInstUsesWith(*II, I0);
2390 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2391 return replaceInstUsesWith(*II,
2392 ConstantInt::get(II->getType(), *RHSC));
2393 }
2394 }
2395
2397 return replaceInstUsesWith(*II, V);
2398
2399 break;
2400 }
2401 case Intrinsic::scmp: {
2402 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2403 Value *LHS, *RHS;
2404 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2405 return replaceInstUsesWith(
2406 CI,
2407 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2408 break;
2409 }
2410 case Intrinsic::bitreverse: {
2411 Value *IIOperand = II->getArgOperand(0);
2412 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2413 Value *X;
2414 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2415 X->getType()->isIntOrIntVectorTy(1)) {
2416 Type *Ty = II->getType();
2417 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2418 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2420 }
2421
2422 if (Instruction *crossLogicOpFold =
2424 return crossLogicOpFold;
2425
2426 break;
2427 }
2428 case Intrinsic::bswap: {
2429 Value *IIOperand = II->getArgOperand(0);
2430
2431 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2432 // inverse-shift-of-bswap:
2433 // bswap (shl X, Y) --> lshr (bswap X), Y
2434 // bswap (lshr X, Y) --> shl (bswap X), Y
2435 Value *X, *Y;
2436 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2437 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2439 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2440 BinaryOperator::BinaryOps InverseShift =
2441 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2442 ? Instruction::LShr
2443 : Instruction::Shl;
2444 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2445 }
2446 }
2447
2448 KnownBits Known = computeKnownBits(IIOperand, II);
2449 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2450 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2451 unsigned BW = Known.getBitWidth();
2452
2453 // bswap(x) -> shift(x) if x has exactly one "active byte"
2454 if (BW - LZ - TZ == 8) {
2455 assert(LZ != TZ && "active byte cannot be in the middle");
2456 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2457 return BinaryOperator::CreateNUWShl(
2458 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2459 // -> lshr(x) if the "active byte" is in the high part of x
2460 return BinaryOperator::CreateExactLShr(
2461 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2462 }
2463
2464 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2465 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2466 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2467 Value *CV = ConstantInt::get(X->getType(), C);
2468 Value *V = Builder.CreateLShr(X, CV);
2469 return new TruncInst(V, IIOperand->getType());
2470 }
2471
2472 if (Instruction *crossLogicOpFold =
2474 return crossLogicOpFold;
2475 }
2476
2477 // Try to fold into bitreverse if bswap is the root of the expression tree.
2478 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2479 /*MatchBitReversals*/ true))
2480 return BitOp;
2481 break;
2482 }
2483 case Intrinsic::masked_load:
2484 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2485 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2486 break;
2487 case Intrinsic::masked_store:
2488 return simplifyMaskedStore(*II);
2489 case Intrinsic::masked_gather:
2490 return simplifyMaskedGather(*II);
2491 case Intrinsic::masked_scatter:
2492 return simplifyMaskedScatter(*II);
2493 case Intrinsic::launder_invariant_group:
2494 case Intrinsic::strip_invariant_group:
2495 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2496 return replaceInstUsesWith(*II, SkippedBarrier);
2497 break;
2498 case Intrinsic::powi: {
2499 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2500 // 0 and 1 are handled in instsimplify
2501 // powi(x, -1) -> 1/x
2502 if (Power->isMinusOne())
2503 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2504 II->getArgOperand(0), II);
2505 // powi(x, 2) -> x*x
2506 if (Power->equalsInt(2))
2507 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2508 II->getArgOperand(0), II);
2509
2510 if (!Power->getValue()[0]) {
2511 Value *X;
2512 // If power is even:
2513 // powi(-x, p) -> powi(x, p)
2514 // powi(fabs(x), p) -> powi(x, p)
2515 // powi(copysign(x, y), p) -> powi(x, p)
2516 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2517 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2518 match(II->getArgOperand(0),
2520 return replaceOperand(*II, 0, X);
2521 }
2522 }
2523 if (ConstantFP *Base = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
2524 Value *Exp = II->getArgOperand(1);
2525 Type *Ty = Base->getType();
2526 // powi(2.0, p) -> ldexp(1.0, p)
2527 if (II->hasApproxFunc() && Base->isExactlyValue(2.0)) {
2528 ConstantFP *One = ConstantFP::get(Ty, 1.0);
2529 if (auto *VTy = dyn_cast<VectorType>(Ty))
2530 Exp = Builder.CreateVectorSplat(VTy->getElementCount(), Exp);
2531 Value *Ldexp = Builder.CreateLdexp(One, Exp, II);
2532 return replaceInstUsesWith(*II, Ldexp);
2533 }
2534 }
2535 break;
2536 }
2537
2538 case Intrinsic::cttz:
2539 case Intrinsic::ctlz:
2540 if (auto *I = foldCttzCtlz(*II, *this))
2541 return I;
2542 break;
2543
2544 case Intrinsic::ctpop:
2545 if (auto *I = foldCtpop(*II, *this))
2546 return I;
2547 break;
2548
2549 case Intrinsic::fshl:
2550 case Intrinsic::fshr: {
2551 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2552 Type *Ty = II->getType();
2553 unsigned BitWidth = Ty->getScalarSizeInBits();
2554 Constant *ShAmtC;
2555 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2556 // Canonicalize a shift amount constant operand to modulo the bit-width.
2557 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2558 Constant *ModuloC =
2559 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2560 if (!ModuloC)
2561 return nullptr;
2562 if (ModuloC != ShAmtC)
2563 return replaceOperand(*II, 2, ModuloC);
2564
2566 ShAmtC, DL),
2567 m_One()) &&
2568 "Shift amount expected to be modulo bitwidth");
2569
2570 // Canonicalize funnel shift right by constant to funnel shift left. This
2571 // is not entirely arbitrary. For historical reasons, the backend may
2572 // recognize rotate left patterns but miss rotate right patterns.
2573 if (IID == Intrinsic::fshr) {
2574 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2575 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2576 return nullptr;
2577
2578 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2579 Module *Mod = II->getModule();
2580 Function *Fshl =
2581 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2582 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2583 }
2584 assert(IID == Intrinsic::fshl &&
2585 "All funnel shifts by simple constants should go left");
2586
2587 // fshl(X, 0, C) --> shl X, C
2588 // fshl(X, undef, C) --> shl X, C
2589 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2590 return BinaryOperator::CreateShl(Op0, ShAmtC);
2591
2592 // fshl(0, X, C) --> lshr X, (BW-C)
2593 // fshl(undef, X, C) --> lshr X, (BW-C)
2594 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2595 return BinaryOperator::CreateLShr(Op1,
2596 ConstantExpr::getSub(WidthC, ShAmtC));
2597
2598 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2599 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2600 Module *Mod = II->getModule();
2601 Function *Bswap =
2602 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2603 return CallInst::Create(Bswap, { Op0 });
2604 }
2605 if (Instruction *BitOp =
2606 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2607 /*MatchBitReversals*/ true))
2608 return BitOp;
2609
2610 // R = fshl(X, X, C2)
2611 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2612 Value *InnerOp;
2613 const APInt *ShAmtInnerC, *ShAmtOuterC;
2614 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2615 m_APInt(ShAmtInnerC))) &&
2616 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2617 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2618 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2619 if (Modulo.isZero())
2620 return replaceInstUsesWith(*II, InnerOp);
2621 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2623 {InnerOp, InnerOp, ModuloC});
2624 }
2625 }
2626
2627 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2628 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2629 // if BitWidth is a power-of-2
2630 Value *Y;
2631 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2632 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2633 Module *Mod = II->getModule();
2635 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2636 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2637 }
2638
2639 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2640 // power-of-2
2641 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2642 match(Op1, m_ZeroInt())) {
2643 Value *Op2 = II->getArgOperand(2);
2644 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2645 return BinaryOperator::CreateShl(Op0, And);
2646 }
2647
2648 // Left or right might be masked.
2650 return &CI;
2651
2652 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2653 // so only the low bits of the shift amount are demanded if the bitwidth is
2654 // a power-of-2.
2655 if (!isPowerOf2_32(BitWidth))
2656 break;
2658 KnownBits Op2Known(BitWidth);
2659 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2660 return &CI;
2661 break;
2662 }
2663 case Intrinsic::pdep: {
2664 const APInt *MaskC;
2665 if (match(II->getArgOperand(1), m_APInt(MaskC))) {
2666 unsigned MaskIdx, MaskLen;
2667 if (MaskC->isShiftedMask(MaskIdx, MaskLen)) {
2668 // any single contiguous sequence of 1s anywhere in the mask simply
2669 // describes a subset of the input bits shifted to the appropriate
2670 // position. Replace with the straight forward IR.
2671 Value *Input = II->getArgOperand(0);
2672 Value *ShiftAmt = ConstantInt::get(II->getType(), MaskIdx);
2673 Value *Shifted = Builder.CreateShl(Input, ShiftAmt);
2674 Value *Masked = Builder.CreateAnd(Shifted, II->getArgOperand(1));
2675 return replaceInstUsesWith(*II, Masked);
2676 }
2677 }
2678 break;
2679 }
2680 case Intrinsic::pext: {
2681 const APInt *MaskC;
2682 if (match(II->getArgOperand(1), m_APInt(MaskC))) {
2683 unsigned MaskIdx, MaskLen;
2684 if (MaskC->isShiftedMask(MaskIdx, MaskLen)) {
2685 // any single contiguous sequence of 1s anywhere in the mask simply
2686 // describes a subset of the input bits shifted to the appropriate
2687 // position. Replace with the straight forward IR.
2688 Value *Input = II->getArgOperand(0);
2689 Value *Masked = Builder.CreateAnd(Input, II->getArgOperand(1));
2690 Value *ShiftAmt = ConstantInt::get(II->getType(), MaskIdx);
2691 Value *Shifted = Builder.CreateLShr(Masked, ShiftAmt);
2692 return replaceInstUsesWith(*II, Shifted);
2693 }
2694 }
2695 break;
2696 }
2697 case Intrinsic::ptrmask: {
2698 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2699 KnownBits Known(BitWidth);
2701 return II;
2702
2703 Value *InnerPtr, *InnerMask;
2704 bool Changed = false;
2705 // Combine:
2706 // (ptrmask (ptrmask p, A), B)
2707 // -> (ptrmask p, (and A, B))
2708 if (match(II->getArgOperand(0),
2710 m_Value(InnerMask))))) {
2711 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2712 "Mask types must match");
2713 // TODO: If InnerMask == Op1, we could copy attributes from inner
2714 // callsite -> outer callsite.
2715 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2716 replaceOperand(CI, 0, InnerPtr);
2717 replaceOperand(CI, 1, NewMask);
2718 Changed = true;
2719 }
2720
2721 // See if we can deduce non-null.
2722 if (!CI.hasRetAttr(Attribute::NonNull) &&
2723 (Known.isNonZero() ||
2724 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2725 CI.addRetAttr(Attribute::NonNull);
2726 Changed = true;
2727 }
2728
2729 unsigned NewAlignmentLog =
2731 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2732 // Known bits will capture if we had alignment information associated with
2733 // the pointer argument.
2734 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2736 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2737 Changed = true;
2738 }
2739 if (Changed)
2740 return &CI;
2741 break;
2742 }
2743 case Intrinsic::uadd_with_overflow:
2744 case Intrinsic::sadd_with_overflow: {
2745 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2746 return I;
2747
2748 // Given 2 constant operands whose sum does not overflow:
2749 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2750 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2751 Value *X;
2752 const APInt *C0, *C1;
2753 Value *Arg0 = II->getArgOperand(0);
2754 Value *Arg1 = II->getArgOperand(1);
2755 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2756 bool HasNWAdd = IsSigned
2757 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2758 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2759 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2760 bool Overflow;
2761 APInt NewC =
2762 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2763 if (!Overflow)
2764 return replaceInstUsesWith(
2765 *II, Builder.CreateBinaryIntrinsic(
2766 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2767 }
2768 break;
2769 }
2770
2771 case Intrinsic::umul_with_overflow:
2772 case Intrinsic::smul_with_overflow:
2773 case Intrinsic::usub_with_overflow:
2774 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2775 return I;
2776 break;
2777
2778 case Intrinsic::ssub_with_overflow: {
2779 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2780 return I;
2781
2782 Constant *C;
2783 Value *Arg0 = II->getArgOperand(0);
2784 Value *Arg1 = II->getArgOperand(1);
2785 // Given a constant C that is not the minimum signed value
2786 // for an integer of a given bit width:
2787 //
2788 // ssubo X, C -> saddo X, -C
2789 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2790 Value *NegVal = ConstantExpr::getNeg(C);
2791 // Build a saddo call that is equivalent to the discovered
2792 // ssubo call.
2793 return replaceInstUsesWith(
2794 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2795 Arg0, NegVal));
2796 }
2797
2798 break;
2799 }
2800
2801 case Intrinsic::uadd_sat:
2802 case Intrinsic::sadd_sat:
2803 case Intrinsic::usub_sat:
2804 case Intrinsic::ssub_sat: {
2806 Type *Ty = SI->getType();
2807 Value *Arg0 = SI->getLHS();
2808 Value *Arg1 = SI->getRHS();
2809
2810 // Make use of known overflow information.
2811 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2812 Arg0, Arg1, SI);
2813 switch (OR) {
2815 break;
2817 if (SI->isSigned())
2818 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2819 else
2820 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2822 unsigned BitWidth = Ty->getScalarSizeInBits();
2823 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2824 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2825 }
2827 unsigned BitWidth = Ty->getScalarSizeInBits();
2828 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2829 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2830 }
2831 }
2832
2833 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2834 // which after that:
2835 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2836 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2837 Constant *C, *C1;
2838 Value *A;
2839 if (IID == Intrinsic::usub_sat &&
2840 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2841 match(Arg1, m_ImmConstant(C1))) {
2842 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2843 auto *NewSub =
2844 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2845 return replaceInstUsesWith(*SI, NewSub);
2846 }
2847
2848 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2849 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2850 C->isNotMinSignedValue()) {
2851 Value *NegVal = ConstantExpr::getNeg(C);
2852 return replaceInstUsesWith(
2853 *II, Builder.CreateBinaryIntrinsic(
2854 Intrinsic::sadd_sat, Arg0, NegVal));
2855 }
2856
2857 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2858 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2859 // if Val and Val2 have the same sign
2860 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2861 Value *X;
2862 const APInt *Val, *Val2;
2863 APInt NewVal;
2864 bool IsUnsigned =
2865 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2866 if (Other->getIntrinsicID() == IID &&
2867 match(Arg1, m_APInt(Val)) &&
2868 match(Other->getArgOperand(0), m_Value(X)) &&
2869 match(Other->getArgOperand(1), m_APInt(Val2))) {
2870 if (IsUnsigned)
2871 NewVal = Val->uadd_sat(*Val2);
2872 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2873 bool Overflow;
2874 NewVal = Val->sadd_ov(*Val2, Overflow);
2875 if (Overflow) {
2876 // Both adds together may add more than SignedMaxValue
2877 // without saturating the final result.
2878 break;
2879 }
2880 } else {
2881 // Cannot fold saturated addition with different signs.
2882 break;
2883 }
2884
2885 return replaceInstUsesWith(
2886 *II, Builder.CreateBinaryIntrinsic(
2887 IID, X, ConstantInt::get(II->getType(), NewVal)));
2888 }
2889 }
2890 break;
2891 }
2892
2893 case Intrinsic::minnum:
2894 case Intrinsic::maxnum:
2895 case Intrinsic::minimumnum:
2896 case Intrinsic::maximumnum:
2897 case Intrinsic::minimum:
2898 case Intrinsic::maximum: {
2899 Value *Arg0 = II->getArgOperand(0);
2900 Value *Arg1 = II->getArgOperand(1);
2901 Value *X, *Y;
2902 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2903 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2904 // If both operands are negated, invert the call and negate the result:
2905 // min(-X, -Y) --> -(max(X, Y))
2906 // max(-X, -Y) --> -(min(X, Y))
2907 Intrinsic::ID NewIID;
2908 switch (IID) {
2909 case Intrinsic::maxnum:
2910 NewIID = Intrinsic::minnum;
2911 break;
2912 case Intrinsic::minnum:
2913 NewIID = Intrinsic::maxnum;
2914 break;
2915 case Intrinsic::maximumnum:
2916 NewIID = Intrinsic::minimumnum;
2917 break;
2918 case Intrinsic::minimumnum:
2919 NewIID = Intrinsic::maximumnum;
2920 break;
2921 case Intrinsic::maximum:
2922 NewIID = Intrinsic::minimum;
2923 break;
2924 case Intrinsic::minimum:
2925 NewIID = Intrinsic::maximum;
2926 break;
2927 default:
2928 llvm_unreachable("unexpected intrinsic ID");
2929 }
2930 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2931 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2932 FNeg->copyIRFlags(II);
2933 return FNeg;
2934 }
2935
2936 // m(m(X, C2), C1) -> m(X, C)
2937 const APFloat *C1, *C2;
2938 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2939 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2940 ((match(M->getArgOperand(0), m_Value(X)) &&
2941 match(M->getArgOperand(1), m_APFloat(C2))) ||
2942 (match(M->getArgOperand(1), m_Value(X)) &&
2943 match(M->getArgOperand(0), m_APFloat(C2))))) {
2944 APFloat Res(0.0);
2945 switch (IID) {
2946 case Intrinsic::maxnum:
2947 Res = maxnum(*C1, *C2);
2948 break;
2949 case Intrinsic::minnum:
2950 Res = minnum(*C1, *C2);
2951 break;
2952 case Intrinsic::maximumnum:
2953 Res = maximumnum(*C1, *C2);
2954 break;
2955 case Intrinsic::minimumnum:
2956 Res = minimumnum(*C1, *C2);
2957 break;
2958 case Intrinsic::maximum:
2959 Res = maximum(*C1, *C2);
2960 break;
2961 case Intrinsic::minimum:
2962 Res = minimum(*C1, *C2);
2963 break;
2964 default:
2965 llvm_unreachable("unexpected intrinsic ID");
2966 }
2967 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2968 // was a simplification (so Arg0 and its original flags could
2969 // propagate?)
2970 Value *V = Builder.CreateBinaryIntrinsic(
2971 IID, X, ConstantFP::get(Arg0->getType(), Res),
2973 return replaceInstUsesWith(*II, V);
2974 }
2975 }
2976
2977 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2978 if (match(Arg0, m_FPExt(m_Value(X))) && match(Arg1, m_FPExt(m_Value(Y))) &&
2979 (Arg0->hasOneUse() || Arg1->hasOneUse()) &&
2980 X->getType() == Y->getType()) {
2981 Value *NewCall =
2982 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2983 return new FPExtInst(NewCall, II->getType());
2984 }
2985
2986 // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
2987 Constant *C;
2988 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2989 match(Arg1, m_ImmConstant(C))) {
2990 if (Constant *TruncC =
2991 getLosslessInvCast(C, X->getType(), Instruction::FPExt, DL)) {
2992 Value *NewCall =
2993 Builder.CreateBinaryIntrinsic(IID, X, TruncC, II, II->getName());
2994 return new FPExtInst(NewCall, II->getType());
2995 }
2996 }
2997
2998 // max X, -X --> fabs X
2999 // min X, -X --> -(fabs X)
3000 // TODO: Remove one-use limitation? That is obviously better for max,
3001 // hence why we don't check for one-use for that. However,
3002 // it would be an extra instruction for min (fnabs), but
3003 // that is still likely better for analysis and codegen.
3004 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
3005 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
3006 return Op0->hasOneUse() ||
3007 (IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
3008 IID != Intrinsic::minimumnum);
3009 return false;
3010 };
3011
3012 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
3013 Value *R = Builder.CreateFAbs(X, II);
3014 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
3015 IID == Intrinsic::minimumnum)
3016 R = Builder.CreateFNegFMF(R, II);
3017 return replaceInstUsesWith(*II, R);
3018 }
3019
3020 break;
3021 }
3022 case Intrinsic::matrix_multiply: {
3023 // Optimize negation in matrix multiplication.
3024
3025 // -A * -B -> A * B
3026 Value *A, *B;
3027 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
3028 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
3029 replaceOperand(*II, 0, A);
3030 replaceOperand(*II, 1, B);
3031 return II;
3032 }
3033
3034 Value *Op0 = II->getOperand(0);
3035 Value *Op1 = II->getOperand(1);
3036 Value *OpNotNeg, *NegatedOp;
3037 unsigned NegatedOpArg, OtherOpArg;
3038 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
3039 NegatedOp = Op0;
3040 NegatedOpArg = 0;
3041 OtherOpArg = 1;
3042 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
3043 NegatedOp = Op1;
3044 NegatedOpArg = 1;
3045 OtherOpArg = 0;
3046 } else
3047 // Multiplication doesn't have a negated operand.
3048 break;
3049
3050 // Only optimize if the negated operand has only one use.
3051 if (!NegatedOp->hasOneUse())
3052 break;
3053
3054 Value *OtherOp = II->getOperand(OtherOpArg);
3055 VectorType *RetTy = cast<VectorType>(II->getType());
3056 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
3057 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
3058 ElementCount NegatedCount = NegatedOpTy->getElementCount();
3059 ElementCount OtherCount = OtherOpTy->getElementCount();
3060 ElementCount RetCount = RetTy->getElementCount();
3061 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
3062 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
3063 ElementCount::isKnownLT(OtherCount, RetCount)) {
3064 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
3065 replaceOperand(*II, NegatedOpArg, OpNotNeg);
3066 replaceOperand(*II, OtherOpArg, InverseOtherOp);
3067 return II;
3068 }
3069 // (-A) * B -> -(A * B), if it is cheaper to negate the result
3070 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
3071 SmallVector<Value *, 5> NewArgs(II->args());
3072 NewArgs[NegatedOpArg] = OpNotNeg;
3073 Value *NewMul = Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
3074 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
3075 }
3076 break;
3077 }
3078 case Intrinsic::fmuladd: {
3079 // Try to simplify the underlying FMul.
3080 if (Value *V =
3081 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
3082 II->getFastMathFlags(), SQ.getWithInstruction(II)))
3083 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
3084 II->getFastMathFlags());
3085
3086 [[fallthrough]];
3087 }
3088 case Intrinsic::fma: {
3089 // fma fneg(x), fneg(y), z -> fma x, y, z
3090 Value *Src0 = II->getArgOperand(0);
3091 Value *Src1 = II->getArgOperand(1);
3092 Value *Src2 = II->getArgOperand(2);
3093 Value *X, *Y;
3094 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
3095 replaceOperand(*II, 0, X);
3096 replaceOperand(*II, 1, Y);
3097 return II;
3098 }
3099
3100 // fma fabs(x), fabs(x), z -> fma x, x, z
3101 if (match(Src0, m_FAbs(m_Value(X))) &&
3102 match(Src1, m_FAbs(m_Specific(X)))) {
3103 replaceOperand(*II, 0, X);
3104 replaceOperand(*II, 1, X);
3105 return II;
3106 }
3107
3108 // Try to simplify the underlying FMul. We can only apply simplifications
3109 // that do not require rounding.
3110 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
3111 SQ.getWithInstruction(II)))
3112 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
3113
3114 // fma x, y, 0 -> fmul x, y
3115 // This is always valid for -0.0, but requires nsz for +0.0 as
3116 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3117 if (match(Src2, m_NegZeroFP()) ||
3118 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3119 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
3120
3121 // fma x, -1.0, y -> fsub y, x
3122 if (match(Src1, m_SpecificFP(-1.0)))
3123 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
3124
3125 break;
3126 }
3127 case Intrinsic::copysign: {
3128 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3129 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3130 Sign, getSimplifyQuery().getWithInstruction(II))) {
3131 if (*KnownSignBit) {
3132 // If we know that the sign argument is negative, reduce to FNABS:
3133 // copysign Mag, -Sign --> fneg (fabs Mag)
3134 Value *Fabs = Builder.CreateFAbs(Mag, II);
3135 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3136 }
3137
3138 // If we know that the sign argument is positive, reduce to FABS:
3139 // copysign Mag, +Sign --> fabs Mag
3140 Value *Fabs = Builder.CreateFAbs(Mag, II);
3141 return replaceInstUsesWith(*II, Fabs);
3142 }
3143
3144 // Propagate sign argument through nested calls:
3145 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3146 Value *X;
3148 Value *CopySign =
3149 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3150 return replaceInstUsesWith(*II, CopySign);
3151 }
3152
3153 // Clear sign-bit of constant magnitude:
3154 // copysign -MagC, X --> copysign MagC, X
3155 // TODO: Support constant folding for fabs
3156 const APFloat *MagC;
3157 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3158 APFloat PosMagC = *MagC;
3159 PosMagC.clearSign();
3160 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
3161 }
3162
3163 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3164 // copysign (fabs X), Sign --> copysign X, Sign
3165 // copysign (fneg X), Sign --> copysign X, Sign
3166 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3167 return replaceOperand(*II, 0, X);
3168
3169 // copysign(floor(fabs(X)), X) --> copysign(trunc(X), X)
3170 // copysign ignores the sign bit of its magnitude argument (implicit fabs),
3171 // so replacing floor(fabs(X)) with trunc(X) is correct for all inputs
3172 // including NaN without requiring nnan. The m_FAbs match also ensures
3173 // the floor argument is non-negative, so floor == trunc.
3174 Value *FAbsArg;
3175 if (match(Mag, m_Intrinsic<Intrinsic::floor>(m_FAbs(m_Value(FAbsArg)))) &&
3176 FAbsArg == Sign) {
3177 Value *Trunc = Builder.CreateUnaryIntrinsic(Intrinsic::trunc, Sign, II);
3178 return replaceOperand(*II, 0, Trunc);
3179 }
3180
3181 Type *SignEltTy = Sign->getType()->getScalarType();
3182
3183 Value *CastSrc;
3184 if (match(Sign,
3186 CastSrc->getType()->isIntOrIntVectorTy() &&
3188 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3190 APInt::getSignMask(Known.getBitWidth()), Known,
3191 SQ))
3192 return II;
3193 }
3194
3195 break;
3196 }
3197 case Intrinsic::fabs: {
3198 Value *Cond, *TVal, *FVal;
3199 Value *Arg = II->getArgOperand(0);
3200 Value *X;
3201 // fabs (-X) --> fabs (X)
3202 if (match(Arg, m_FNeg(m_Value(X)))) {
3203 Value *Fabs = Builder.CreateFAbs(X, II);
3204 return replaceInstUsesWith(CI, Fabs);
3205 }
3206
3207 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3208 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3209 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3210 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3211 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3212 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3213 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3214 SI->setFastMathFlags(II->getFastMathFlags() |
3215 cast<SelectInst>(Arg)->getFastMathFlags());
3216 // Can't copy nsz to select, as even with the nsz flag the fabs result
3217 // always has the sign bit unset.
3218 SI->setHasNoSignedZeros(false);
3219 return SI;
3220 }
3221 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3222 if (match(TVal, m_FNeg(m_Specific(FVal))))
3223 return replaceOperand(*II, 0, FVal);
3224 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3225 if (match(FVal, m_FNeg(m_Specific(TVal))))
3226 return replaceOperand(*II, 0, TVal);
3227 }
3228
3229 Value *Magnitude, *Sign;
3230 if (match(II->getArgOperand(0),
3231 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3232 // fabs (copysign x, y) -> (fabs x)
3233 Value *AbsSign = Builder.CreateFAbs(Magnitude, II);
3234 return replaceInstUsesWith(*II, AbsSign);
3235 }
3236
3237 [[fallthrough]];
3238 }
3239 case Intrinsic::ceil:
3240 case Intrinsic::floor:
3241 case Intrinsic::round:
3242 case Intrinsic::roundeven:
3243 case Intrinsic::nearbyint:
3244 case Intrinsic::rint:
3245 case Intrinsic::trunc: {
3246 Value *ExtSrc;
3247 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3248 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3249 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3250 return new FPExtInst(NarrowII, II->getType());
3251 }
3252 break;
3253 }
3254 case Intrinsic::cos:
3255 case Intrinsic::amdgcn_cos:
3256 case Intrinsic::cosh: {
3257 Value *X, *Sign;
3258 Value *Src = II->getArgOperand(0);
3259 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3260 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3261 // f(-x) --> f(x)
3262 // f(fabs(x)) --> f(x)
3263 // f(copysign(x, y)) --> f(x)
3264 // for f in {cos, cosh}
3265 return replaceOperand(*II, 0, X);
3266 }
3267 break;
3268 }
3269 case Intrinsic::sin:
3270 case Intrinsic::amdgcn_sin:
3271 case Intrinsic::sinh:
3272 case Intrinsic::tan:
3273 case Intrinsic::tanh: {
3274 Value *X;
3275 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3276 // f(-x) --> -f(x)
3277 // for f in {sin, sinh, tan, tanh}
3278 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3279 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3280 }
3281 break;
3282 }
3283 case Intrinsic::ldexp: {
3284 Value *Src = II->getArgOperand(0);
3285 Value *Exp = II->getArgOperand(1);
3286
3287 // ldexp(x, K) -> fmul x, 2^K
3288 uint64_t ConstExp;
3289 if (match(Exp, m_ConstantInt(ConstExp))) {
3290 const fltSemantics &FPTy =
3291 Src->getType()->getScalarType()->getFltSemantics();
3292
3293 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3295 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3296 // Skip overflow and underflow cases.
3297 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3298 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3299 }
3300 }
3301
3302 // ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b))
3303 //
3304 // A danger is if the first ldexp would overflow to infinity or underflow to
3305 // zero, but the combined exponent avoids it.
3306 //
3307 // We ignore this with reassoc, or if we know both exponents have the same
3308 // sign (since then we'd just double down on the over/underflow which would
3309 // occur anyway).
3310 //
3311 // ldexp can take arbitrary integer types, so we also need to ensure that
3312 // our exponent type is wide enough so that if sadd.sat(a, b) saturates,
3313 // then ldexp at the saturated exponent saturates to inf or zero as well.
3314 //
3315 // TODO: Could do better if we had range tracking for the input value
3316 // exponent. Also could broaden sign check to cover == 0 case.
3317 Value *InnerSrc;
3318 Value *InnerExp;
3320 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3321 Exp->getType() == InnerExp->getType()) {
3322 FastMathFlags FMF = II->getFastMathFlags();
3323 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3324
3325 if (ldexpSaturatingAddIsSafe(II->getType(), Exp->getType()) &&
3326 ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3327 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II)))) {
3328 Value *NewExp =
3329 Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, InnerExp, Exp);
3330 II->setArgOperand(1, NewExp);
3331 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3332 return replaceOperand(*II, 0, InnerSrc);
3333 }
3334 }
3335
3336 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3337 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3338 Value *ExtSrc;
3339 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3340 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3341 Value *Select =
3342 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3343 ConstantFP::get(II->getType(), 1.0));
3345 }
3346 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3347 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3348 Value *Select =
3349 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3350 ConstantFP::get(II->getType(), 1.0));
3352 }
3353
3354 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3355 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3356 ///
3357 // TODO: If we cared, should insert a canonicalize for x
3358 Value *SelectCond, *SelectLHS, *SelectRHS;
3359 if (match(II->getArgOperand(1),
3360 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3361 m_Value(SelectRHS))))) {
3362 Value *NewLdexp = nullptr;
3363 Value *Select = nullptr;
3364 if (match(SelectRHS, m_ZeroInt())) {
3365 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3366 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3367 } else if (match(SelectLHS, m_ZeroInt())) {
3368 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3369 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3370 }
3371
3372 if (NewLdexp) {
3373 Select->takeName(II);
3374 return replaceInstUsesWith(*II, Select);
3375 }
3376 }
3377
3378 break;
3379 }
3380 case Intrinsic::ptrauth_auth:
3381 case Intrinsic::ptrauth_resign: {
3382 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3383 // sign+auth component if the key and discriminator match.
3384 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3385 Value *Ptr = II->getArgOperand(0);
3386 Value *Key = II->getArgOperand(1);
3387 Value *Disc = II->getArgOperand(2);
3388 Value *DS = nullptr;
3389 if (auto Bundle = II->getOperandBundle(LLVMContext::OB_deactivation_symbol))
3390 DS = Bundle->Inputs[0];
3391
3392 // AuthKey will be the key we need to end up authenticating against in
3393 // whatever we replace this sequence with.
3394 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3395 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3396 Value *OtherDS = nullptr;
3397 if (auto Bundle =
3399 OtherDS = Bundle->Inputs[0];
3400 if (DS != OtherDS)
3401 break;
3402
3403 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3404 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3405 break;
3406 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3407 // The resign intrinsic does not support deactivation symbols.
3408 assert(!DS);
3409 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3410 break;
3411 AuthKey = CI->getArgOperand(1);
3412 AuthDisc = CI->getArgOperand(2);
3413 } else
3414 break;
3415 BasePtr = CI->getArgOperand(0);
3416 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3417 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3418 // our purposes, so check for that too.
3419 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3420 if (!CPA || DS || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3421 break;
3422
3423 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3424 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3425 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3426 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3427 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3428 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3429 SignDisc, /*AddrDisc=*/Null,
3430 /*DeactivationSymbol=*/Null);
3432 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3433 return eraseInstFromFunction(*II);
3434 }
3435
3436 // auth(ptrauth(p,k,d),k,d) -> p
3437 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3438 } else
3439 break;
3440
3441 unsigned NewIntrin;
3442 if (AuthKey && NeedSign) {
3443 // resign(0,1) + resign(1,2) = resign(0, 2)
3444 NewIntrin = Intrinsic::ptrauth_resign;
3445 } else if (AuthKey) {
3446 // resign(0,1) + auth(1) = auth(0)
3447 NewIntrin = Intrinsic::ptrauth_auth;
3448 } else if (NeedSign) {
3449 // sign(0) + resign(0, 1) = sign(1)
3450 NewIntrin = Intrinsic::ptrauth_sign;
3451 } else {
3452 // sign(0) + auth(0) = nop
3453 replaceInstUsesWith(*II, BasePtr);
3454 return eraseInstFromFunction(*II);
3455 }
3456
3457 SmallVector<Value *, 4> CallArgs;
3458 CallArgs.push_back(BasePtr);
3459 if (AuthKey) {
3460 CallArgs.push_back(AuthKey);
3461 CallArgs.push_back(AuthDisc);
3462 }
3463
3464 if (NeedSign) {
3465 CallArgs.push_back(II->getArgOperand(3));
3466 CallArgs.push_back(II->getArgOperand(4));
3467 }
3468
3469 std::vector<OperandBundleDef> Bundles;
3470 if (DS)
3471 Bundles.push_back(OperandBundleDef("deactivation-symbol", DS));
3472
3473 Function *NewFn =
3474 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3475 return CallInst::Create(NewFn, CallArgs, Bundles);
3476 }
3477 case Intrinsic::arm_neon_vtbl1:
3478 case Intrinsic::arm_neon_vtbl2:
3479 case Intrinsic::arm_neon_vtbl3:
3480 case Intrinsic::arm_neon_vtbl4:
3481 case Intrinsic::aarch64_neon_tbl1:
3482 case Intrinsic::aarch64_neon_tbl2:
3483 case Intrinsic::aarch64_neon_tbl3:
3484 case Intrinsic::aarch64_neon_tbl4:
3485 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3486 case Intrinsic::arm_neon_vtbx1:
3487 case Intrinsic::arm_neon_vtbx2:
3488 case Intrinsic::arm_neon_vtbx3:
3489 case Intrinsic::arm_neon_vtbx4:
3490 case Intrinsic::aarch64_neon_tbx1:
3491 case Intrinsic::aarch64_neon_tbx2:
3492 case Intrinsic::aarch64_neon_tbx3:
3493 case Intrinsic::aarch64_neon_tbx4:
3494 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3495
3496 case Intrinsic::arm_neon_vmulls:
3497 case Intrinsic::arm_neon_vmullu:
3498 case Intrinsic::aarch64_neon_smull:
3499 case Intrinsic::aarch64_neon_umull: {
3500 Value *Arg0 = II->getArgOperand(0);
3501 Value *Arg1 = II->getArgOperand(1);
3502
3503 // Handle mul by zero first:
3505 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3506 }
3507
3508 // Check for constant LHS & RHS - in this case we just simplify.
3509 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3510 IID == Intrinsic::aarch64_neon_umull);
3511 VectorType *NewVT = cast<VectorType>(II->getType());
3512 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3513 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3514 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3515 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3516 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3517 }
3518
3519 // Couldn't simplify - canonicalize constant to the RHS.
3520 std::swap(Arg0, Arg1);
3521 }
3522
3523 // Handle mul by one:
3524 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3525 if (ConstantInt *Splat =
3526 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3527 if (Splat->isOne())
3528 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3529 /*isSigned=*/!Zext);
3530
3531 break;
3532 }
3533 case Intrinsic::arm_neon_aesd:
3534 case Intrinsic::arm_neon_aese:
3535 case Intrinsic::aarch64_crypto_aesd:
3536 case Intrinsic::aarch64_crypto_aese:
3537 case Intrinsic::aarch64_sve_aesd:
3538 case Intrinsic::aarch64_sve_aese: {
3539 Value *DataArg = II->getArgOperand(0);
3540 Value *KeyArg = II->getArgOperand(1);
3541
3542 // Accept zero on either operand.
3543 if (!match(KeyArg, m_ZeroInt()))
3544 std::swap(KeyArg, DataArg);
3545
3546 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3547 Value *Data, *Key;
3548 if (match(KeyArg, m_ZeroInt()) &&
3549 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3550 replaceOperand(*II, 0, Data);
3551 replaceOperand(*II, 1, Key);
3552 return II;
3553 }
3554 break;
3555 }
3556 case Intrinsic::arm_neon_vshifts:
3557 case Intrinsic::arm_neon_vshiftu:
3558 case Intrinsic::aarch64_neon_sshl:
3559 case Intrinsic::aarch64_neon_ushl:
3560 return foldNeonShift(II, *this);
3561 case Intrinsic::hexagon_V6_vandvrt:
3562 case Intrinsic::hexagon_V6_vandvrt_128B: {
3563 // Simplify Q -> V -> Q conversion.
3564 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3565 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3566 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3567 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3568 break;
3569 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3570 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3571 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3572 // Check if every byte has common bits in Bytes and Mask.
3573 uint64_t C = Bytes1 & Mask1;
3574 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3575 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3576 }
3577 break;
3578 }
3579 case Intrinsic::stackrestore: {
3580 enum class ClassifyResult {
3581 None,
3582 Alloca,
3583 StackRestore,
3584 CallWithSideEffects,
3585 };
3586 auto Classify = [](const Instruction *I) {
3587 if (isa<AllocaInst>(I))
3588 return ClassifyResult::Alloca;
3589
3590 if (auto *CI = dyn_cast<CallInst>(I)) {
3591 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3592 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3593 return ClassifyResult::StackRestore;
3594
3595 if (II->mayHaveSideEffects())
3596 return ClassifyResult::CallWithSideEffects;
3597 } else {
3598 // Consider all non-intrinsic calls to be side effects
3599 return ClassifyResult::CallWithSideEffects;
3600 }
3601 }
3602
3603 return ClassifyResult::None;
3604 };
3605
3606 // If the stacksave and the stackrestore are in the same BB, and there is
3607 // no intervening call, alloca, or stackrestore of a different stacksave,
3608 // remove the restore. This can happen when variable allocas are DCE'd.
3609 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3610 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3611 SS->getParent() == II->getParent()) {
3612 BasicBlock::iterator BI(SS);
3613 bool CannotRemove = false;
3614 for (++BI; &*BI != II; ++BI) {
3615 switch (Classify(&*BI)) {
3616 case ClassifyResult::None:
3617 // So far so good, look at next instructions.
3618 break;
3619
3620 case ClassifyResult::StackRestore:
3621 // If we found an intervening stackrestore for a different
3622 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3623 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3624 CannotRemove = true;
3625 break;
3626
3627 case ClassifyResult::Alloca:
3628 case ClassifyResult::CallWithSideEffects:
3629 // If we found an alloca, a non-intrinsic call, or an intrinsic
3630 // call with side effects, we can't remove the stackrestore.
3631 CannotRemove = true;
3632 break;
3633 }
3634 if (CannotRemove)
3635 break;
3636 }
3637
3638 if (!CannotRemove)
3639 return eraseInstFromFunction(CI);
3640 }
3641 }
3642
3643 // Scan down this block to see if there is another stack restore in the
3644 // same block without an intervening call/alloca.
3646 Instruction *TI = II->getParent()->getTerminator();
3647 bool CannotRemove = false;
3648 for (++BI; &*BI != TI; ++BI) {
3649 switch (Classify(&*BI)) {
3650 case ClassifyResult::None:
3651 // So far so good, look at next instructions.
3652 break;
3653
3654 case ClassifyResult::StackRestore:
3655 // If there is a stackrestore below this one, remove this one.
3656 return eraseInstFromFunction(CI);
3657
3658 case ClassifyResult::Alloca:
3659 case ClassifyResult::CallWithSideEffects:
3660 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3661 // with side effects (such as llvm.stacksave and llvm.read_register),
3662 // we can't remove the stack restore.
3663 CannotRemove = true;
3664 break;
3665 }
3666 if (CannotRemove)
3667 break;
3668 }
3669
3670 // If the stack restore is in a return, resume, or unwind block and if there
3671 // are no allocas or calls between the restore and the return, nuke the
3672 // restore.
3673 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3674 return eraseInstFromFunction(CI);
3675 break;
3676 }
3677 case Intrinsic::lifetime_end:
3678 // Asan needs to poison memory to detect invalid access which is possible
3679 // even for empty lifetime range.
3680 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3681 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3682 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress) ||
3683 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag))
3684 break;
3685
3686 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3687 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3688 }))
3689 return nullptr;
3690 break;
3691 case Intrinsic::assume: {
3692 for (auto [Idx, OBU] : llvm::enumerate(II->operand_bundles())) {
3693 auto RemoveBundle = [&, Idx = Idx]() -> Instruction * {
3694 if (II->getNumOperandBundles() == 1)
3695 return eraseInstFromFunction(*II);
3697 };
3698
3699 switch (getBundleAttrFromOBU(OBU)) {
3700 case BundleAttr::None:
3701 llvm_unreachable("Unexpected Attribute");
3702 case BundleAttr::Align: {
3703 // Try to remove redundant alignment assumptions.
3704 auto [Ptr, _, Alignment, Offset] = getAssumeAlignInfo(OBU);
3705
3706 if (!Alignment || !Offset)
3707 break;
3708
3709 // Remove align 1 and non-power-of-two bundles; they don't add any
3710 // useful information.
3711 if (*Alignment == 1 || !isPowerOf2_64(*Alignment))
3712 return RemoveBundle();
3713
3714 if (auto *GEP = dyn_cast<GEPOperator>(Ptr);
3715 GEP &&
3716 GEP->getMaxPreservedAlignment(getDataLayout()) >= *Alignment) {
3717 Builder.CreateAlignmentAssumption(
3718 getDataLayout(), GEP->getPointerOperand(), *Alignment,
3719 *Offset == 0 ? nullptr : Builder.getInt64(*Offset));
3720 return RemoveBundle();
3721 }
3722
3723 Value *BasePtr;
3724 const APInt *PtrOffset;
3725 if (match(Ptr.get(), m_PtrAdd(m_Value(BasePtr), m_APInt(PtrOffset)))) {
3726 auto PtrOffsetVal =
3727 PtrOffset->sextOrTrunc(DL.getIndexTypeSizeInBits(Ptr->getType()))
3728 .trySExtValue();
3729 if (!PtrOffsetVal)
3730 break;
3731 Builder.CreateAlignmentAssumption(
3732 DL, BasePtr, *Alignment,
3733 Builder.getInt64(*Offset - *PtrOffsetVal));
3734 return RemoveBundle();
3735 }
3736
3737 // Don't try to remove align assumptions for pointers derived from
3738 // arguments. We might lose information if the function gets inline and
3739 // the align argument attribute disappears.
3740 Value *UO = getUnderlyingObject(Ptr);
3741 if (!UO || isa<Argument>(UO))
3742 break;
3743
3744 // Compute known bits for the pointer and drop the assume if the
3745 // known alignment isn't increased by it.
3746 auto AlignMask = (*Alignment - 1);
3747 if (KnownBits KB = computeKnownBits(Ptr, II);
3748 (KB.Zero & AlignMask) == (~*Offset & AlignMask) &&
3749 (KB.One & AlignMask) == (*Offset & AlignMask))
3750 return RemoveBundle();
3751 break;
3752 }
3753
3754 case BundleAttr::Dereferenceable: {
3755 auto [Ptr, _, Count] = getAssumeDereferenceableInfo(OBU);
3756
3757 if (!Count)
3758 break;
3759
3760 if (*Count == 0 ||
3762 getSimplifyQuery().getWithInstruction(II)))
3763 return RemoveBundle();
3764
3765 break;
3766 }
3767
3768 case BundleAttr::Ignore:
3769 return RemoveBundle();
3770
3771 case BundleAttr::NonNull: {
3772 auto [Ptr] = llvm::getAssumeNonNullInfo(OBU);
3773
3774 // Drop assume if we can prove nonnull without it
3775 if (isKnownNonZero(Ptr, getSimplifyQuery().getWithInstruction(II)))
3776 return RemoveBundle();
3777
3778 // Fold the assume into metadata if it's valid at the load
3779 if (auto *LI = dyn_cast<LoadInst>(Ptr);
3780 LI &&
3781 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3782 MDNode *MD = MDNode::get(II->getContext(), {});
3783 LI->setMetadata(LLVMContext::MD_nonnull, MD);
3784 LI->setMetadata(LLVMContext::MD_noundef, MD);
3785 return RemoveBundle();
3786 }
3787
3788 if (auto *GEP = dyn_cast<GEPOperator>(Ptr);
3789 GEP && GEP->isInBounds() &&
3790 !NullPointerIsDefined(II->getFunction(),
3791 Ptr->getType()->getPointerAddressSpace())) {
3792 Builder.CreateNonnullAssumption(GEP->stripInBoundsOffsets());
3793 return RemoveBundle();
3794 }
3795
3796 // TODO: apply nonnull return attributes to calls and invokes
3797 break;
3798 }
3799
3800 case BundleAttr::NoUndef: {
3801 auto [Val] = getAssumeNoUndefInfo(OBU);
3802
3804 return RemoveBundle();
3805
3806 if (auto *LI = dyn_cast<LoadInst>(Val);
3807 LI &&
3808 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3809 LI->setMetadata(LLVMContext::MD_noundef,
3810 MDNode::get(II->getContext(), {}));
3811 return RemoveBundle();
3812 }
3813
3814 } break;
3815
3816 case BundleAttr::SeparateStorage: {
3817 auto [Ptr1, Ptr2] = getAssumeSeparateStorageInfo(OBU);
3818 // Separate storage assumptions apply to the underlying allocations, not
3819 // any particular pointer within them. When evaluating the hints for AA
3820 // purposes we getUnderlyingObject them; by precomputing the answers
3821 // here we can avoid having to do so repeatedly there.
3822 auto MaybeSimplifyHint = [&](const Use &U) {
3823 Value *Hint = U.get();
3824 // Not having a limit is safe because InstCombine removes unreachable
3825 // code.
3826 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3827 if (Hint != UnderlyingObject)
3828 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3829 };
3830 MaybeSimplifyHint(Ptr1);
3831 MaybeSimplifyHint(Ptr2);
3832 } break;
3833
3834 // TODO: Drop these assumes when they are redundant
3835 case BundleAttr::DereferenceableOrNull:
3836 break;
3837
3838 // This cannot be simplified
3839 case BundleAttr::Cold:
3840 break;
3841 }
3842 }
3843
3844 // If the assume has operand bundles, the folds below will never work, so
3845 // don't bother trying.
3846 if (II->hasOperandBundles())
3847 break;
3848
3849 Value *IIOperand = II->getArgOperand(0);
3850
3851 // Canonicalize assume(a && b) -> assume(a); assume(b);
3852 // Note: New assumption intrinsics created here are registered by
3853 // the InstCombineIRInserter object.
3854 Value *A, *B;
3855 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3856 Builder.CreateAssumption(A);
3857 Builder.CreateAssumption(B);
3858 return eraseInstFromFunction(*II);
3859 }
3860 // assume(!(a || b)) -> assume(!a); assume(!b);
3861 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3862 Builder.CreateAssumption(Builder.CreateNot(A));
3863 Builder.CreateAssumption(Builder.CreateNot(B));
3864 return eraseInstFromFunction(*II);
3865 }
3866
3867 // Convert nonnull assume like:
3868 // %A = icmp ne i32* %PTR, null
3869 // call void @llvm.assume(i1 %A)
3870 // into
3871 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3872 if (match(IIOperand,
3874 A->getType()->isPointerTy()) {
3875 Builder.CreateNonnullAssumption(A);
3876 return eraseInstFromFunction(*II);
3877 }
3878
3879 // Convert alignment assume like:
3880 // %B = ptrtoint i32* %A to i64
3881 // %C = and i64 %B, Constant
3882 // %D = icmp eq i64 %C, 0
3883 // call void @llvm.assume(i1 %D)
3884 // into
3885 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3886 uint64_t AlignMask = 1;
3887 if ((match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3888 match(IIOperand,
3890 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3891 m_Zero())))) {
3892 if (isPowerOf2_64(AlignMask + 1)) {
3893 uint64_t Offset = 0;
3895 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3896 /// Note: this doesn't preserve the offset information but merges
3897 /// offset and alignment.
3898 /// TODO: we can generate a GEP instead of merging the alignment with
3899 /// the offset.
3900 Builder.CreateAlignmentAssumption(getDataLayout(), A,
3901 MinAlign(Offset, AlignMask + 1));
3902 return eraseInstFromFunction(*II);
3903 }
3904 }
3905 }
3906
3907 // Remove assumes on true/false
3908 if (auto *CI = dyn_cast<ConstantInt>(IIOperand);
3909 CI || isa<UndefValue, PoisonValue>(IIOperand)) {
3910 if (!CI || CI->isZero())
3912 return eraseInstFromFunction(*II);
3913 }
3914
3915 // Update the cache of affected values for this assumption (we might be
3916 // here because we just simplified the condition).
3917 AC.updateAffectedValues(cast<AssumeInst>(II));
3918 break;
3919 }
3920 case Intrinsic::experimental_guard: {
3921 // Is this guard followed by another guard? We scan forward over a small
3922 // fixed window of instructions to handle common cases with conditions
3923 // computed between guards.
3924 Instruction *NextInst = II->getNextNode();
3925 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3926 // Note: Using context-free form to avoid compile time blow up
3927 if (!isSafeToSpeculativelyExecute(NextInst))
3928 break;
3929 NextInst = NextInst->getNextNode();
3930 }
3931 Value *NextCond = nullptr;
3932 if (match(NextInst,
3934 Value *CurrCond = II->getArgOperand(0);
3935
3936 // Remove a guard that it is immediately preceded by an identical guard.
3937 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3938 if (CurrCond != NextCond) {
3939 Instruction *MoveI = II->getNextNode();
3940 while (MoveI != NextInst) {
3941 auto *Temp = MoveI;
3942 MoveI = MoveI->getNextNode();
3943 Temp->moveBefore(II->getIterator());
3944 }
3945 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3946 }
3947 eraseInstFromFunction(*NextInst);
3948 return II;
3949 }
3950 break;
3951 }
3952 case Intrinsic::vector_insert: {
3953 Value *Vec = II->getArgOperand(0);
3954 Value *SubVec = II->getArgOperand(1);
3955 Value *Idx = II->getArgOperand(2);
3956 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3957 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3958 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3959
3960 // Only canonicalize if the destination vector, Vec, and SubVec are all
3961 // fixed vectors.
3962 if (DstTy && VecTy && SubVecTy) {
3963 unsigned DstNumElts = DstTy->getNumElements();
3964 unsigned VecNumElts = VecTy->getNumElements();
3965 unsigned SubVecNumElts = SubVecTy->getNumElements();
3966 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3967
3968 // An insert that entirely overwrites Vec with SubVec is a nop.
3969 if (VecNumElts == SubVecNumElts)
3970 return replaceInstUsesWith(CI, SubVec);
3971
3972 // Widen SubVec into a vector of the same width as Vec, since
3973 // shufflevector requires the two input vectors to be the same width.
3974 // Elements beyond the bounds of SubVec within the widened vector are
3975 // undefined.
3976 SmallVector<int, 8> WidenMask;
3977 unsigned i;
3978 for (i = 0; i != SubVecNumElts; ++i)
3979 WidenMask.push_back(i);
3980 for (; i != VecNumElts; ++i)
3981 WidenMask.push_back(PoisonMaskElem);
3982
3983 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3984
3986 for (unsigned i = 0; i != IdxN; ++i)
3987 Mask.push_back(i);
3988 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3989 Mask.push_back(i);
3990 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3991 Mask.push_back(i);
3992
3993 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3994 return replaceInstUsesWith(CI, Shuffle);
3995 }
3996 break;
3997 }
3998 case Intrinsic::vector_extract: {
3999 Value *Vec = II->getArgOperand(0);
4000 Value *Idx = II->getArgOperand(1);
4001
4002 Type *ReturnType = II->getType();
4003 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
4004 // ExtractIdx)
4005 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
4006 Value *InsertTuple, *InsertIdx, *InsertValue;
4008 m_Value(InsertValue),
4009 m_Value(InsertIdx))) &&
4010 InsertValue->getType() == ReturnType) {
4011 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
4012 // Case where we get the same index right after setting it.
4013 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
4014 // InsertValue
4015 if (ExtractIdx == Index)
4016 return replaceInstUsesWith(CI, InsertValue);
4017 // If we are getting a different index than what was set in the
4018 // insert.vector intrinsic. We can just set the input tuple to the one up
4019 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
4020 // InsertIndex), ExtractIndex)
4021 // --> extract.vector(InsertTuple, ExtractIndex)
4022 else
4023 return replaceOperand(CI, 0, InsertTuple);
4024 }
4025
4026 ConstantInt *ALMUpperBound;
4028 m_Value(), m_ConstantInt(ALMUpperBound)))) {
4029 const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
4030 unsigned VScaleMin = Attrs.getVScaleRangeMin();
4031 unsigned ScaleFactor =
4032 cast<VectorType>(ReturnType)->isScalableTy() ? VScaleMin : 1;
4033 if (ExtractIdx * ScaleFactor >= ALMUpperBound->getZExtValue())
4034 return replaceInstUsesWith(CI,
4035 ConstantVector::getNullValue(ReturnType));
4036 }
4037
4038 auto *DstTy = dyn_cast<VectorType>(ReturnType);
4039 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
4040
4041 if (DstTy && VecTy) {
4042 auto DstEltCnt = DstTy->getElementCount();
4043 auto VecEltCnt = VecTy->getElementCount();
4044 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
4045
4046 // Extracting the entirety of Vec is a nop.
4047 if (DstEltCnt == VecTy->getElementCount()) {
4048 replaceInstUsesWith(CI, Vec);
4049 return eraseInstFromFunction(CI);
4050 }
4051
4052 // Only canonicalize to shufflevector if the destination vector and
4053 // Vec are fixed vectors.
4054 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
4055 break;
4056
4058 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
4059 Mask.push_back(IdxN + i);
4060
4061 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
4062 return replaceInstUsesWith(CI, Shuffle);
4063 }
4064 break;
4065 }
4066 case Intrinsic::experimental_vp_reverse: {
4067 Value *X;
4068 Value *Vec = II->getArgOperand(0);
4069 Value *Mask = II->getArgOperand(1);
4070 if (!match(Mask, m_AllOnes()))
4071 break;
4072 Value *EVL = II->getArgOperand(2);
4073 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
4074 // rev(unop rev(X)) --> unop X
4075 if (match(Vec,
4077 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
4078 auto *OldUnOp = cast<UnaryOperator>(Vec);
4080 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
4081 II->getIterator());
4082 return replaceInstUsesWith(CI, NewUnOp);
4083 }
4084 break;
4085 }
4086 case Intrinsic::vector_reduce_or:
4087 case Intrinsic::vector_reduce_and: {
4088 // Canonicalize logical or/and reductions:
4089 // Or reduction for i1 is represented as:
4090 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4091 // %res = cmp ne iReduxWidth %val, 0
4092 // And reduction for i1 is represented as:
4093 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4094 // %res = cmp eq iReduxWidth %val, 11111
4095 Value *Arg = II->getArgOperand(0);
4096 Value *Vect;
4097
4098 if (Value *NewOp =
4099 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4100 replaceUse(II->getOperandUse(0), NewOp);
4101 return II;
4102 }
4103
4104 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4105 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4106 if (FTy->getElementType() == Builder.getInt1Ty()) {
4107 Value *Res = Builder.CreateBitCast(
4108 Vect, Builder.getIntNTy(FTy->getNumElements()));
4109 if (IID == Intrinsic::vector_reduce_and) {
4110 Res = Builder.CreateICmpEQ(
4112 } else {
4113 assert(IID == Intrinsic::vector_reduce_or &&
4114 "Expected or reduction.");
4115 Res = Builder.CreateIsNotNull(Res);
4116 }
4117 if (Arg != Vect)
4118 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4119 II->getType());
4120 return replaceInstUsesWith(CI, Res);
4121 }
4122 }
4123 [[fallthrough]];
4124 }
4125 case Intrinsic::vector_reduce_add: {
4126 if (IID == Intrinsic::vector_reduce_add) {
4127 // Convert vector_reduce_add(ZExt(<n x i1>)) to
4128 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4129 // Convert vector_reduce_add(SExt(<n x i1>)) to
4130 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4131 // Convert vector_reduce_add(<n x i1>) to
4132 // Trunc(ctpop(bitcast <n x i1> to in)).
4133 Value *Arg = II->getArgOperand(0);
4134 Value *Vect;
4135
4136 if (Value *NewOp =
4137 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4138 replaceUse(II->getOperandUse(0), NewOp);
4139 return II;
4140 }
4141
4142 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4143 if (Value *Splat = getSplatValue(Arg)) {
4144 ElementCount VecToReduceCount =
4145 cast<VectorType>(Arg->getType())->getElementCount();
4146 if (VecToReduceCount.isFixed()) {
4147 unsigned VectorSize = VecToReduceCount.getFixedValue();
4148 return BinaryOperator::CreateMul(
4149 Splat,
4150 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
4151 /*ImplicitTrunc=*/true));
4152 }
4153 }
4154
4155 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4156 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4157 if (FTy->getElementType() == Builder.getInt1Ty()) {
4158 Value *V = Builder.CreateBitCast(
4159 Vect, Builder.getIntNTy(FTy->getNumElements()));
4160 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
4161 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
4162 if (Arg != Vect &&
4163 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
4164 Res = Builder.CreateNeg(Res);
4165 return replaceInstUsesWith(CI, Res);
4166 }
4167 }
4168 }
4169 [[fallthrough]];
4170 }
4171 case Intrinsic::vector_reduce_xor: {
4172 if (IID == Intrinsic::vector_reduce_xor) {
4173 // Exclusive disjunction reduction over the vector with
4174 // (potentially-extended) i1 element type is actually a
4175 // (potentially-extended) arithmetic `add` reduction over the original
4176 // non-extended value:
4177 // vector_reduce_xor(?ext(<n x i1>))
4178 // -->
4179 // ?ext(vector_reduce_add(<n x i1>))
4180 Value *Arg = II->getArgOperand(0);
4181 Value *Vect;
4182
4183 if (Value *NewOp =
4184 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4185 replaceUse(II->getOperandUse(0), NewOp);
4186 return II;
4187 }
4188
4189 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4190 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4191 if (VTy->getElementType() == Builder.getInt1Ty()) {
4192 Value *Res = Builder.CreateAddReduce(Vect);
4193 if (Arg != Vect)
4194 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4195 II->getType());
4196 return replaceInstUsesWith(CI, Res);
4197 }
4198 }
4199 }
4200 [[fallthrough]];
4201 }
4202 case Intrinsic::vector_reduce_mul: {
4203 if (IID == Intrinsic::vector_reduce_mul) {
4204 Value *Arg = II->getArgOperand(0);
4205
4206 if (Value *NewOp =
4207 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4208 replaceUse(II->getOperandUse(0), NewOp);
4209 return II;
4210 }
4211
4212 // vector_reduce_mul(zext(<n x i1>)), or
4213 // vector_reduce_mul(sext(<n x i1>)) (if n is even) -->
4214 // zext(vector_reduce_and(<n x i1>)).
4215 // (The sext case doesn't work if n is odd because multiplying an odd
4216 // number of -1's produces -1, not 1.)
4217 Value *Vect;
4218 bool IsZext = match(Arg, m_ZExt(m_Value(Vect))) &&
4219 Vect->getType()->isIntOrIntVectorTy(1);
4220 bool IsSext =
4221 match(Arg, m_SExt(m_Value(Vect))) &&
4222 Vect->getType()->isIntOrIntVectorTy(1) &&
4223 cast<VectorType>(Vect->getType())->getElementCount().isKnownEven();
4224 if (IsZext || IsSext) {
4225 Value *Res = Builder.CreateAndReduce(Vect);
4226 return CastInst::Create(Instruction::ZExt, Res, II->getType());
4227 }
4228
4229 // vector_reduce_mul(<n x i1>) --> vector_reduce_and(<n x i1>)
4230 if (Arg->getType()->isIntOrIntVectorTy(1))
4231 return replaceInstUsesWith(CI, Builder.CreateAndReduce(Arg));
4232 }
4233 [[fallthrough]];
4234 }
4235 case Intrinsic::vector_reduce_umin:
4236 case Intrinsic::vector_reduce_umax: {
4237 if (IID == Intrinsic::vector_reduce_umin ||
4238 IID == Intrinsic::vector_reduce_umax) {
4239 // UMin/UMax reduction over the vector with (potentially-extended)
4240 // i1 element type is actually a (potentially-extended)
4241 // logical `and`/`or` reduction over the original non-extended value:
4242 // vector_reduce_u{min,max}(?ext(<n x i1>))
4243 // -->
4244 // ?ext(vector_reduce_{and,or}(<n x i1>))
4245 Value *Arg = II->getArgOperand(0);
4246 Value *Vect;
4247
4248 if (Value *NewOp =
4249 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4250 replaceUse(II->getOperandUse(0), NewOp);
4251 return II;
4252 }
4253
4254 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4255 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4256 if (VTy->getElementType() == Builder.getInt1Ty()) {
4257 Value *Res = IID == Intrinsic::vector_reduce_umin
4258 ? Builder.CreateAndReduce(Vect)
4259 : Builder.CreateOrReduce(Vect);
4260 if (Arg != Vect)
4261 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4262 II->getType());
4263 return replaceInstUsesWith(CI, Res);
4264 }
4265 }
4266 }
4267 [[fallthrough]];
4268 }
4269 case Intrinsic::vector_reduce_smin:
4270 case Intrinsic::vector_reduce_smax: {
4271 if (IID == Intrinsic::vector_reduce_smin ||
4272 IID == Intrinsic::vector_reduce_smax) {
4273 // SMin/SMax reduction over the vector with (potentially-extended)
4274 // i1 element type is actually a (potentially-extended)
4275 // logical `and`/`or` reduction over the original non-extended value:
4276 // vector_reduce_s{min,max}(<n x i1>)
4277 // -->
4278 // vector_reduce_{or,and}(<n x i1>)
4279 // and
4280 // vector_reduce_s{min,max}(sext(<n x i1>))
4281 // -->
4282 // sext(vector_reduce_{or,and}(<n x i1>))
4283 // and
4284 // vector_reduce_s{min,max}(zext(<n x i1>))
4285 // -->
4286 // zext(vector_reduce_{and,or}(<n x i1>))
4287 Value *Arg = II->getArgOperand(0);
4288 Value *Vect;
4289
4290 if (Value *NewOp =
4291 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4292 replaceUse(II->getOperandUse(0), NewOp);
4293 return II;
4294 }
4295
4296 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4297 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4298 if (VTy->getElementType() == Builder.getInt1Ty()) {
4299 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4300 if (Arg != Vect)
4301 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4302 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4303 (ExtOpc == Instruction::CastOps::ZExt))
4304 ? Builder.CreateAndReduce(Vect)
4305 : Builder.CreateOrReduce(Vect);
4306 if (Arg != Vect)
4307 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4308 return replaceInstUsesWith(CI, Res);
4309 }
4310 }
4311 }
4312 [[fallthrough]];
4313 }
4314 case Intrinsic::vector_reduce_fmax:
4315 case Intrinsic::vector_reduce_fmin:
4316 case Intrinsic::vector_reduce_fadd:
4317 case Intrinsic::vector_reduce_fmul: {
4318 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4319 IID != Intrinsic::vector_reduce_fmul) ||
4320 II->hasAllowReassoc();
4321 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4322 IID == Intrinsic::vector_reduce_fmul)
4323 ? 1
4324 : 0;
4325 Value *Arg = II->getArgOperand(ArgIdx);
4326 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4327 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4328 return nullptr;
4329 }
4330 break;
4331 }
4332 case Intrinsic::is_fpclass: {
4333 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4334 return I;
4335 break;
4336 }
4337 case Intrinsic::threadlocal_address: {
4338 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4339 MaybeAlign Align = II->getRetAlign();
4340 if (MinAlign > Align.valueOrOne()) {
4341 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4342 return II;
4343 }
4344 break;
4345 }
4346 case Intrinsic::fptoui_sat:
4347 case Intrinsic::fptosi_sat:
4348 if (Instruction *I = foldItoFPtoI(*II))
4349 return I;
4350 break;
4351 case Intrinsic::frexp: {
4352 // frexp(frexp(x).fract) -> { frexp(x).fract, 0 }: the fraction operand is
4353 // already normalized, so the first result is idempotent and the second is
4354 // zero.
4355 if (match(II->getArgOperand(0),
4357 Value *Res = Builder.CreateInsertValue(PoisonValue::get(II->getType()),
4358 II->getArgOperand(0), 0);
4359 Res = Builder.CreateInsertValue(
4360 Res, Constant::getNullValue(II->getType()->getStructElementType(1)),
4361 1);
4362 return replaceInstUsesWith(*II, Res);
4363 }
4364 break;
4365 }
4366 case Intrinsic::get_active_lane_mask: {
4367 const APInt *Op0, *Op1;
4368 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4369 match(II->getOperand(1), m_APInt(Op1))) {
4370 Type *OpTy = II->getOperand(0)->getType();
4371 return replaceInstUsesWith(
4372 *II, Builder.CreateIntrinsic(
4373 II->getType(), Intrinsic::get_active_lane_mask,
4374 {Constant::getNullValue(OpTy),
4375 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4376 }
4377 break;
4378 }
4379 case Intrinsic::experimental_get_vector_length: {
4380 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4381 unsigned BitWidth =
4382 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4383 II->getType()->getScalarSizeInBits());
4384 ConstantRange Cnt =
4385 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4386 SQ.getWithInstruction(II))
4388 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4389 ->getValue()
4390 .zextOrTrunc(Cnt.getBitWidth());
4391 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4392 MaxLanes = MaxLanes.multiply(
4393 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4394
4395 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4396 return replaceInstUsesWith(
4397 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4398 return nullptr;
4399 }
4400 default: {
4401 // Handle target specific intrinsics
4402 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4403 if (V)
4404 return *V;
4405 break;
4406 }
4407 }
4408
4409 // Try to fold intrinsic into select/phi operands. This is legal if:
4410 // * The intrinsic is speculatable.
4411 // * The operand is one of the following:
4412 // - a phi.
4413 // - a select with a scalar condition.
4414 // - a select with a vector condition and II is not a cross lane operation.
4416 for (Value *Op : II->args()) {
4417 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4418 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4419 if (IsVectorCond &&
4420 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4421 continue;
4422 // Don't replace a scalar select with a more expensive vector select if
4423 // we can't simplify both arms of the select.
4424 bool SimplifyBothArms =
4425 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4427 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4428 return R;
4429 }
4430 if (auto *Phi = dyn_cast<PHINode>(Op))
4431 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4432 return R;
4433 }
4434 }
4435
4437 return Shuf;
4438
4440 return replaceInstUsesWith(*II, Reverse);
4441
4443 return replaceInstUsesWith(*II, Res);
4444
4445 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4446 // context, so it is handled in visitCallBase and we should trigger it.
4447 return visitCallBase(*II);
4448}
4449
4450// Fence instruction simplification
4452 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4453 // This check is solely here to handle arbitrary target-dependent syncscopes.
4454 // TODO: Can remove if does not matter in practice.
4455 if (NFI && FI.isIdenticalTo(NFI))
4456 return eraseInstFromFunction(FI);
4457
4458 // Returns true if FI1 is identical or stronger fence than FI2.
4459 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4460 auto FI1SyncScope = FI1->getSyncScopeID();
4461 // Consider same scope, where scope is global or single-thread.
4462 if (FI1SyncScope != FI2->getSyncScopeID() ||
4463 (FI1SyncScope != SyncScope::System &&
4464 FI1SyncScope != SyncScope::SingleThread))
4465 return false;
4466
4467 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4468 };
4469 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4470 return eraseInstFromFunction(FI);
4471
4472 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4473 if (isIdenticalOrStrongerFence(PFI, &FI))
4474 return eraseInstFromFunction(FI);
4475 return nullptr;
4476}
4477
4478// InvokeInst simplification
4480 return visitCallBase(II);
4481}
4482
4483// CallBrInst simplification
4485 return visitCallBase(CBI);
4486}
4487
4488// A simple parser for format string specifiers for the purposes of the
4489// modular-format attribute. In the case of malformed format strings this might
4490// under or over report the specifiers present, but such cases are undefined
4491// behavior.
4493 Bitset<256> Specifiers;
4494 for (size_t I = 0; I < FormatStr.size(); ++I) {
4495 if (FormatStr[I] != '%')
4496 continue;
4497
4498 // Check for escaped '%'.
4499 if (I + 1 < FormatStr.size() && FormatStr[I + 1] == '%') {
4500 ++I; // Skip the second '%'.
4501 continue;
4502 }
4503
4504 // Scan past allowed prefix characters.
4505 size_t J =
4506 FormatStr.find_first_not_of("0123456789-+ #0$.*'hlLjztqwvI", I + 1);
4507 if (J == StringRef::npos)
4508 break;
4509
4510 Specifiers.set(static_cast<unsigned char>(FormatStr[J]));
4511 I = J; // Resume search from after the specifier.
4512 }
4513 return Specifiers;
4514}
4515
4516static bool isAspectNeeded(StringRef Aspect, CallInst *CI,
4517 std::optional<unsigned> FirstArgIdx,
4518 const std::optional<Bitset<256>> &Specifiers) {
4519 if (Aspect == "float") {
4520 if (Specifiers) {
4521 static constexpr Bitset<256> FloatSpecifiers{'f', 'F', 'e', 'E',
4522 'g', 'G', 'a', 'A'};
4523 return (*Specifiers & FloatSpecifiers).any();
4524 }
4525 // Fallback to type-based check for dynamic format string.
4526 if (!FirstArgIdx)
4527 return true;
4528 return llvm::any_of(
4529 llvm::make_range(std::next(CI->arg_begin(), *FirstArgIdx),
4530 CI->arg_end()),
4531 [](Value *V) { return V->getType()->isFloatingPointTy(); });
4532 }
4533 if (Aspect == "fixed") {
4534 if (Specifiers) {
4535 static constexpr Bitset<256> FixedSpecifiers{'r', 'R', 'k', 'K'};
4536 return (*Specifiers & FixedSpecifiers).any();
4537 }
4538 // Fallback for fixed-point: assume needed if format is dynamic.
4539 return true;
4540 }
4541 // Unknown aspects are always considered to be needed.
4542 return true;
4543}
4544
4545static void referenceAspect(StringRef Aspect, StringRef ImplName, Module *M,
4546 IRBuilderBase &B) {
4547 SmallString<20> Name = ImplName;
4548 Name += '_';
4549 Name += Aspect;
4550 LLVMContext &Ctx = M->getContext();
4551 Function *RelocNoneFn =
4552 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4553 B.CreateCall(RelocNoneFn,
4554 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4555}
4556
4558 if (!CI->hasFnAttr("modular-format"))
4559 return nullptr;
4560
4562 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4563 if (Args.size() < 5)
4564 return nullptr;
4565
4566 StringRef FormatIdxStr = Args[1];
4567 StringRef FirstArgIdxStr = Args[2];
4568 StringRef FnName = Args[3];
4569 StringRef ImplName = Args[4];
4571
4572 unsigned FormatIdx;
4573 std::optional<unsigned> FirstArgIdx;
4574 [[maybe_unused]] bool Error;
4575 Error = FormatIdxStr.getAsInteger(10, FormatIdx);
4576 assert(!Error && "invalid format arg index");
4577 --FormatIdx; // 1-based to 0-based
4578
4579 FirstArgIdx.emplace();
4580 Error = FirstArgIdxStr.getAsInteger(10, *FirstArgIdx);
4581 assert(!Error && "invalid first arg index");
4582 if (*FirstArgIdx > 0)
4583 --*FirstArgIdx; // 1-based to 0-based
4584 else
4585 FirstArgIdx.reset();
4586
4587 if (AllAspects.empty())
4588 return nullptr;
4589
4590 Value *FormatVal = CI->getArgOperand(FormatIdx);
4591 StringRef FormatStr;
4592
4593 std::optional<Bitset<256>> Specifiers;
4594 if (getConstantStringInfo(FormatVal, FormatStr))
4595 Specifiers = parseFormatStringSpecifiers(FormatStr);
4596
4597 SmallVector<StringRef> NeededAspects;
4598 for (StringRef Aspect : AllAspects)
4599 if (isAspectNeeded(Aspect, CI, FirstArgIdx, Specifiers))
4600 NeededAspects.push_back(Aspect);
4601
4602 if (NeededAspects.size() == AllAspects.size())
4603 return nullptr;
4604
4605 Module *M = CI->getModule();
4606 LLVMContext &Ctx = M->getContext();
4607 Function *Callee = CI->getCalledFunction();
4608 FunctionCallee ModularFn = M->getOrInsertFunction(
4609 FnName, Callee->getFunctionType(),
4610 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4611 CallInst *New = cast<CallInst>(CI->clone());
4612 New->setCalledFunction(ModularFn);
4613 New->removeFnAttr("modular-format");
4614 B.Insert(New);
4615
4616 llvm::sort(NeededAspects);
4617 for (StringRef Request : NeededAspects)
4618 referenceAspect(Request, ImplName, M, B);
4619
4620 return New;
4621}
4622
4623Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4624 if (!CI->getCalledFunction()) return nullptr;
4625
4626 // Skip optimizing notail and musttail calls so
4627 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4628 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4629 if (CI->isMustTailCall() || CI->isNoTailCall())
4630 return nullptr;
4631
4632 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4633 replaceInstUsesWith(*From, With);
4634 };
4635 auto InstCombineErase = [this](Instruction *I) {
4637 };
4638 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4639 InstCombineRAUW, InstCombineErase);
4640 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4641 ++NumSimplified;
4642 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4643 }
4644 if (Value *With = optimizeModularFormat(CI, Builder)) {
4645 ++NumSimplified;
4646 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4647 }
4648
4649 return nullptr;
4650}
4651
4653 // Strip off at most one level of pointer casts, looking for an alloca. This
4654 // is good enough in practice and simpler than handling any number of casts.
4655 Value *Underlying = TrampMem->stripPointerCasts();
4656 if (Underlying != TrampMem &&
4657 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4658 return nullptr;
4659 if (!isa<AllocaInst>(Underlying))
4660 return nullptr;
4661
4662 IntrinsicInst *InitTrampoline = nullptr;
4663 for (User *U : TrampMem->users()) {
4665 if (!II)
4666 return nullptr;
4667 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4668 if (InitTrampoline)
4669 // More than one init_trampoline writes to this value. Give up.
4670 return nullptr;
4671 InitTrampoline = II;
4672 continue;
4673 }
4674 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4675 // Allow any number of calls to adjust.trampoline.
4676 continue;
4677 return nullptr;
4678 }
4679
4680 // No call to init.trampoline found.
4681 if (!InitTrampoline)
4682 return nullptr;
4683
4684 // Check that the alloca is being used in the expected way.
4685 if (InitTrampoline->getOperand(0) != TrampMem)
4686 return nullptr;
4687
4688 return InitTrampoline;
4689}
4690
4692 Value *TrampMem) {
4693 // Visit all the previous instructions in the basic block, and try to find a
4694 // init.trampoline which has a direct path to the adjust.trampoline.
4695 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4696 E = AdjustTramp->getParent()->begin();
4697 I != E;) {
4698 Instruction *Inst = &*--I;
4700 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4701 II->getOperand(0) == TrampMem)
4702 return II;
4703 if (Inst->mayWriteToMemory())
4704 return nullptr;
4705 }
4706 return nullptr;
4707}
4708
4709// Given a call to llvm.adjust.trampoline, find and return the corresponding
4710// call to llvm.init.trampoline if the call to the trampoline can be optimized
4711// to a direct call to a function. Otherwise return NULL.
4713 Callee = Callee->stripPointerCasts();
4714 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4715 if (!AdjustTramp ||
4716 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4717 return nullptr;
4718
4719 Value *TrampMem = AdjustTramp->getOperand(0);
4720
4722 return IT;
4723 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4724 return IT;
4725 return nullptr;
4726}
4727
4728Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4729 const Value *Callee = Call.getCalledOperand();
4730 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4731 if (!IPC || !IPC->isNoopCast(DL))
4732 return nullptr;
4733
4734 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4735 if (!II)
4736 return nullptr;
4737
4738 Intrinsic::ID IIID = II->getIntrinsicID();
4739 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4740 return nullptr;
4741
4742 // Isolate the ptrauth bundle from the others.
4743 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4745 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4746 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4747 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4748 PtrAuthBundleOrNone = Bundle;
4749 else
4750 NewBundles.emplace_back(Bundle);
4751 }
4752
4753 if (!PtrAuthBundleOrNone)
4754 return nullptr;
4755
4756 Value *NewCallee = nullptr;
4757 switch (IIID) {
4758 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4759 // assuming the call bundle and the sign operands match.
4760 case Intrinsic::ptrauth_resign: {
4761 // Resign result key should match bundle.
4762 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4763 return nullptr;
4764 // Resign result discriminator should match bundle.
4765 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4766 return nullptr;
4767
4768 // Resign input (auth) key should also match: we can't change the key on
4769 // the new call we're generating, because we don't know what keys are valid.
4770 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4771 return nullptr;
4772
4773 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4774 NewBundles.emplace_back("ptrauth", NewBundleOps);
4775 NewCallee = II->getOperand(0);
4776 break;
4777 }
4778
4779 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4780 // assuming the call bundle and the sign operands match.
4781 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4782 case Intrinsic::ptrauth_sign: {
4783 // Sign key should match bundle.
4784 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4785 return nullptr;
4786 // Sign discriminator should match bundle.
4787 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4788 return nullptr;
4789 NewCallee = II->getOperand(0);
4790 break;
4791 }
4792 default:
4793 llvm_unreachable("unexpected intrinsic ID");
4794 }
4795
4796 if (!NewCallee)
4797 return nullptr;
4798
4799 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4800 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4801 NewCall->setCalledOperand(NewCallee);
4802 return NewCall;
4803}
4804
4805Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4807 if (!CPA)
4808 return nullptr;
4809
4810 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4811 // If the ptrauth constant isn't based on a function pointer, bail out.
4812 if (!CalleeF)
4813 return nullptr;
4814
4815 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4817 if (!PAB)
4818 return nullptr;
4819
4820 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4821 Value *Discriminator = PAB->Inputs[1];
4822
4823 // If the bundle doesn't match, this is probably going to fail to auth.
4824 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4825 return nullptr;
4826
4827 // If the bundle matches the constant, proceed in making this a direct call.
4829 NewCall->setCalledOperand(CalleeF);
4830 return NewCall;
4831}
4832
4833bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4834 const TargetLibraryInfo *TLI) {
4835 // Note: We only handle cases which can't be driven from generic attributes
4836 // here. So, for example, nonnull and noalias (which are common properties
4837 // of some allocation functions) are expected to be handled via annotation
4838 // of the respective allocator declaration with generic attributes.
4839 bool Changed = false;
4840
4841 if (!Call.getType()->isPointerTy())
4842 return Changed;
4843
4844 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4845 if (Size && *Size != 0) {
4846 // TODO: We really should just emit deref_or_null here and then
4847 // let the generic inference code combine that with nonnull.
4848 if (Call.hasRetAttr(Attribute::NonNull)) {
4849 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4851 Call.getContext(), Size->getLimitedValue()));
4852 } else {
4853 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4855 Call.getContext(), Size->getLimitedValue()));
4856 }
4857 }
4858
4859 // Add alignment attribute if alignment is a power of two constant.
4860 Value *Alignment = getAllocAlignment(&Call, TLI);
4861 if (!Alignment)
4862 return Changed;
4863
4864 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4865 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4866 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4867 if (llvm::isPowerOf2_64(AlignmentVal)) {
4868 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4869 Align NewAlign = Align(AlignmentVal);
4870 if (NewAlign > ExistingAlign) {
4873 Changed = true;
4874 }
4875 }
4876 }
4877 return Changed;
4878}
4879
4880/// Improvements for call, callbr and invoke instructions.
4881Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4882 bool Changed = annotateAnyAllocSite(Call, &TLI);
4883
4884 // Mark any parameters that are known to be non-null with the nonnull
4885 // attribute. This is helpful for inlining calls to functions with null
4886 // checks on their arguments.
4887 SmallVector<unsigned, 4> ArgNos;
4888 unsigned ArgNo = 0;
4889
4890 for (Value *V : Call.args()) {
4891 if (V->getType()->isPointerTy()) {
4892 // Simplify the nonnull operand if the parameter is known to be nonnull.
4893 // Otherwise, try to infer nonnull for it.
4894 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4895 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4896 (HasDereferenceable &&
4898 V->getType()->getPointerAddressSpace()))) {
4899 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4900 replaceOperand(Call, ArgNo, Res);
4901 Changed = true;
4902 }
4903 } else if (isKnownNonZero(V,
4904 getSimplifyQuery().getWithInstruction(&Call))) {
4905 ArgNos.push_back(ArgNo);
4906 }
4907 }
4908 ArgNo++;
4909 }
4910
4911 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4912
4913 if (!ArgNos.empty()) {
4914 AttributeList AS = Call.getAttributes();
4915 LLVMContext &Ctx = Call.getContext();
4916 AS = AS.addParamAttribute(Ctx, ArgNos,
4917 Attribute::get(Ctx, Attribute::NonNull));
4918 Call.setAttributes(AS);
4919 Changed = true;
4920 }
4921
4922 // If the callee is a pointer to a function, attempt to move any casts to the
4923 // arguments of the call/callbr/invoke.
4925 Function *CalleeF = dyn_cast<Function>(Callee);
4926 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4927 transformConstExprCastCall(Call))
4928 return nullptr;
4929
4930 if (CalleeF) {
4931 // Remove the convergent attr on calls when the callee is not convergent.
4932 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4933 !CalleeF->isIntrinsic()) {
4934 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4935 << "\n");
4937 return &Call;
4938 }
4939
4940 // If the call and callee calling conventions don't match, and neither one
4941 // of the calling conventions is compatible with C calling convention
4942 // this call must be unreachable, as the call is undefined.
4943 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4944 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4948 // Only do this for calls to a function with a body. A prototype may
4949 // not actually end up matching the implementation's calling conv for a
4950 // variety of reasons (e.g. it may be written in assembly).
4951 !CalleeF->isDeclaration()) {
4952 Instruction *OldCall = &Call;
4954 // If OldCall does not return void then replaceInstUsesWith poison.
4955 // This allows ValueHandlers and custom metadata to adjust itself.
4956 if (!OldCall->getType()->isVoidTy())
4957 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4958 if (isa<CallInst>(OldCall))
4959 return eraseInstFromFunction(*OldCall);
4960
4961 // We cannot remove an invoke or a callbr, because it would change thexi
4962 // CFG, just change the callee to a null pointer.
4963 cast<CallBase>(OldCall)->setCalledFunction(
4964 CalleeF->getFunctionType(),
4965 Constant::getNullValue(CalleeF->getType()));
4966 return nullptr;
4967 }
4968 }
4969
4970 // Calling a null function pointer is undefined if a null address isn't
4971 // dereferenceable.
4972 if ((isa<ConstantPointerNull>(Callee) &&
4974 isa<UndefValue>(Callee)) {
4975 // If Call does not return void then replaceInstUsesWith poison.
4976 // This allows ValueHandlers and custom metadata to adjust itself.
4977 if (!Call.getType()->isVoidTy())
4979
4980 if (Call.isTerminator()) {
4981 // Can't remove an invoke or callbr because we cannot change the CFG.
4982 return nullptr;
4983 }
4984
4985 // This instruction is not reachable, just remove it.
4988 }
4989
4990 if (IntrinsicInst *II = findInitTrampoline(Callee))
4991 return transformCallThroughTrampoline(Call, *II);
4992
4993 // Combine calls involving pointer authentication intrinsics.
4994 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4995 return NewCall;
4996
4997 // Combine calls to ptrauth constants.
4998 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4999 return NewCall;
5000
5001 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
5002 InlineAsm *IA = cast<InlineAsm>(Callee);
5003 if (!IA->canThrow()) {
5004 // Normal inline asm calls cannot throw - mark them
5005 // 'nounwind'.
5007 Changed = true;
5008 }
5009 }
5010
5011 // Try to optimize the call if possible, we require DataLayout for most of
5012 // this. None of these calls are seen as possibly dead so go ahead and
5013 // delete the instruction now.
5014 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
5015 Instruction *I = tryOptimizeCall(CI);
5016 // If we changed something return the result, etc. Otherwise let
5017 // the fallthrough check.
5018 if (I) return eraseInstFromFunction(*I);
5019 }
5020
5021 if (!Call.use_empty() && !Call.isMustTailCall())
5022 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
5023 Type *CallTy = Call.getType();
5024 Type *RetArgTy = ReturnedArg->getType();
5025 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
5026 return replaceInstUsesWith(
5027 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
5028 }
5029
5030 // Drop unnecessary callee_type metadata from calls that were converted
5031 // into direct calls.
5032 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
5033 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
5034 Changed = true;
5035 }
5036
5037 // Drop unnecessary kcfi operand bundles from calls that were converted
5038 // into direct calls.
5040 if (Bundle && !Call.isIndirectCall()) {
5041 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
5042 if (CalleeF) {
5043 ConstantInt *FunctionType = nullptr;
5044 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
5045
5046 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
5047 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
5048
5049 if (FunctionType &&
5050 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
5051 dbgs() << Call.getModule()->getName()
5052 << ": warning: kcfi: " << Call.getCaller()->getName()
5053 << ": call to " << CalleeF->getName()
5054 << " using a mismatching function pointer type\n";
5055 }
5056 });
5057
5059 }
5060
5061 if (isRemovableAlloc(&Call, &TLI))
5062 return visitAllocSite(Call);
5063
5064 // Handle intrinsics which can be used in both call and invoke context.
5065 switch (Call.getIntrinsicID()) {
5066 case Intrinsic::experimental_gc_statepoint: {
5067 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
5068 SmallPtrSet<Value *, 32> LiveGcValues;
5069 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
5070 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
5071
5072 // Remove the relocation if unused.
5073 if (GCR.use_empty()) {
5075 continue;
5076 }
5077
5078 Value *DerivedPtr = GCR.getDerivedPtr();
5079 Value *BasePtr = GCR.getBasePtr();
5080
5081 // Undef is undef, even after relocation.
5082 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
5085 continue;
5086 }
5087
5088 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
5089 // The relocation of null will be null for most any collector.
5090 // TODO: provide a hook for this in GCStrategy. There might be some
5091 // weird collector this property does not hold for.
5092 if (isa<ConstantPointerNull>(DerivedPtr)) {
5093 // Use null-pointer of gc_relocate's type to replace it.
5096 continue;
5097 }
5098
5099 // isKnownNonNull -> nonnull attribute
5100 if (!GCR.hasRetAttr(Attribute::NonNull) &&
5101 isKnownNonZero(DerivedPtr,
5102 getSimplifyQuery().getWithInstruction(&Call))) {
5103 GCR.addRetAttr(Attribute::NonNull);
5104 // We discovered new fact, re-check users.
5105 Worklist.pushUsersToWorkList(GCR);
5106 }
5107 }
5108
5109 // If we have two copies of the same pointer in the statepoint argument
5110 // list, canonicalize to one. This may let us common gc.relocates.
5111 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
5112 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
5113 auto *OpIntTy = GCR.getOperand(2)->getType();
5114 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
5115 }
5116
5117 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
5118 // Canonicalize on the type from the uses to the defs
5119
5120 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
5121 LiveGcValues.insert(BasePtr);
5122 LiveGcValues.insert(DerivedPtr);
5123 }
5124 std::optional<OperandBundleUse> Bundle =
5126 unsigned NumOfGCLives = LiveGcValues.size();
5127 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
5128 break;
5129 // We can reduce the size of gc live bundle.
5130 DenseMap<Value *, unsigned> Val2Idx;
5131 std::vector<Value *> NewLiveGc;
5132 for (Value *V : Bundle->Inputs) {
5133 auto [It, Inserted] = Val2Idx.try_emplace(V);
5134 if (!Inserted)
5135 continue;
5136 if (LiveGcValues.count(V)) {
5137 It->second = NewLiveGc.size();
5138 NewLiveGc.push_back(V);
5139 } else
5140 It->second = NumOfGCLives;
5141 }
5142 // Update all gc.relocates
5143 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
5144 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
5145 Value *BasePtr = GCR.getBasePtr();
5146 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
5147 "Missed live gc for base pointer");
5148 auto *OpIntTy1 = GCR.getOperand(1)->getType();
5149 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
5150 Value *DerivedPtr = GCR.getDerivedPtr();
5151 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
5152 "Missed live gc for derived pointer");
5153 auto *OpIntTy2 = GCR.getOperand(2)->getType();
5154 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
5155 }
5156 // Create new statepoint instruction.
5157 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
5158 return CallBase::Create(&Call, NewBundle);
5159 }
5160 default: { break; }
5161 }
5162
5163 return Changed ? &Call : nullptr;
5164}
5165
5166/// If the callee is a constexpr cast of a function, attempt to move the cast to
5167/// the arguments of the call/invoke.
5168/// CallBrInst is not supported.
5169bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
5170 auto *Callee =
5172 if (!Callee)
5173 return false;
5174
5176 "CallBr's don't have a single point after a def to insert at");
5177
5178 // Don't perform the transform for declarations, which may not be fully
5179 // accurate. For example, void @foo() is commonly used as a placeholder for
5180 // unknown prototypes.
5181 if (Callee->isDeclaration())
5182 return false;
5183
5184 // If this is a call to a thunk function, don't remove the cast. Thunks are
5185 // used to transparently forward all incoming parameters and outgoing return
5186 // values, so it's important to leave the cast in place.
5187 if (Callee->hasFnAttribute("thunk"))
5188 return false;
5189
5190 // If this is a call to a naked function, the assembly might be
5191 // using an argument, or otherwise rely on the frame layout,
5192 // the function prototype will mismatch.
5193 if (Callee->hasFnAttribute(Attribute::Naked))
5194 return false;
5195
5196 // If this is a musttail call, the callee's prototype must match the caller's
5197 // prototype with the exception of pointee types. The code below doesn't
5198 // implement that, so we can't do this transform.
5199 // TODO: Do the transform if it only requires adding pointer casts.
5200 if (Call.isMustTailCall())
5201 return false;
5202
5204 const AttributeList &CallerPAL = Call.getAttributes();
5205
5206 // Okay, this is a cast from a function to a different type. Unless doing so
5207 // would cause a type conversion of one of our arguments, change this call to
5208 // be a direct call with arguments casted to the appropriate types.
5209 FunctionType *FT = Callee->getFunctionType();
5210 Type *OldRetTy = Caller->getType();
5211 Type *NewRetTy = FT->getReturnType();
5212
5213 // Check to see if we are changing the return type...
5214 if (OldRetTy != NewRetTy) {
5215
5216 if (NewRetTy->isStructTy())
5217 return false; // TODO: Handle multiple return values.
5218
5219 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
5220 if (!Caller->use_empty())
5221 return false; // Cannot transform this return value.
5222 }
5223
5224 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5225 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5226 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
5227 NewRetTy, CallerPAL.getRetAttrs())))
5228 return false; // Attribute not compatible with transformed value.
5229 }
5230
5231 // If the callbase is an invoke instruction, and the return value is
5232 // used by a PHI node in a successor, we cannot change the return type of
5233 // the call because there is no place to put the cast instruction (without
5234 // breaking the critical edge). Bail out in this case.
5235 if (!Caller->use_empty()) {
5236 BasicBlock *PhisNotSupportedBlock = nullptr;
5237 if (auto *II = dyn_cast<InvokeInst>(Caller))
5238 PhisNotSupportedBlock = II->getNormalDest();
5239 if (PhisNotSupportedBlock)
5240 for (User *U : Caller->users())
5241 if (PHINode *PN = dyn_cast<PHINode>(U))
5242 if (PN->getParent() == PhisNotSupportedBlock)
5243 return false;
5244 }
5245 }
5246
5247 unsigned NumActualArgs = Call.arg_size();
5248 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5249
5250 // Prevent us turning:
5251 // declare void @takes_i32_inalloca(i32* inalloca)
5252 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5253 //
5254 // into:
5255 // call void @takes_i32_inalloca(i32* null)
5256 //
5257 // Similarly, avoid folding away bitcasts of byval calls.
5258 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
5259 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
5260 return false;
5261
5262 auto AI = Call.arg_begin();
5263 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5264 Type *ParamTy = FT->getParamType(i);
5265 Type *ActTy = (*AI)->getType();
5266
5267 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5268 return false; // Cannot transform this parameter value.
5269
5270 // Check if there are any incompatible attributes we cannot drop safely.
5271 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5272 .overlaps(AttributeFuncs::typeIncompatible(
5273 ParamTy, CallerPAL.getParamAttrs(i),
5274 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5275 return false; // Attribute not compatible with transformed value.
5276
5277 if (Call.isInAllocaArgument(i) ||
5278 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5279 return false; // Cannot transform to and from inalloca/preallocated.
5280
5281 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5282 return false;
5283
5284 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5285 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5286 return false; // Cannot transform to or from byval.
5287 }
5288
5289 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5290 !CallerPAL.isEmpty()) {
5291 // In this case we have more arguments than the new function type, but we
5292 // won't be dropping them. Check that these extra arguments have attributes
5293 // that are compatible with being a vararg call argument.
5294 unsigned SRetIdx;
5295 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5296 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5297 return false;
5298 }
5299
5300 // Okay, we decided that this is a safe thing to do: go ahead and start
5301 // inserting cast instructions as necessary.
5302 SmallVector<Value *, 8> Args;
5304 Args.reserve(NumActualArgs);
5305 ArgAttrs.reserve(NumActualArgs);
5306
5307 // Get any return attributes.
5308 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5309
5310 // If the return value is not being used, the type may not be compatible
5311 // with the existing attributes. Wipe out any problematic attributes.
5312 RAttrs.remove(
5313 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5314
5315 LLVMContext &Ctx = Call.getContext();
5316 AI = Call.arg_begin();
5317 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5318 Type *ParamTy = FT->getParamType(i);
5319
5320 Value *NewArg = *AI;
5321 if ((*AI)->getType() != ParamTy)
5322 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5323 Args.push_back(NewArg);
5324
5325 // Add any parameter attributes except the ones incompatible with the new
5326 // type. Note that we made sure all incompatible ones are safe to drop.
5327 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5328 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5329 ArgAttrs.push_back(
5330 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5331 }
5332
5333 // If the function takes more arguments than the call was taking, add them
5334 // now.
5335 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5336 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5337 ArgAttrs.push_back(AttributeSet());
5338 }
5339
5340 // If we are removing arguments to the function, emit an obnoxious warning.
5341 if (FT->getNumParams() < NumActualArgs) {
5342 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5343 if (FT->isVarArg()) {
5344 // Add all of the arguments in their promoted form to the arg list.
5345 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5346 Type *PTy = getPromotedType((*AI)->getType());
5347 Value *NewArg = *AI;
5348 if (PTy != (*AI)->getType()) {
5349 // Must promote to pass through va_arg area!
5350 Instruction::CastOps opcode =
5351 CastInst::getCastOpcode(*AI, false, PTy, false);
5352 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5353 }
5354 Args.push_back(NewArg);
5355
5356 // Add any parameter attributes.
5357 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5358 }
5359 }
5360 }
5361
5362 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5363
5364 if (NewRetTy->isVoidTy())
5365 Caller->setName(""); // Void type should not have a name.
5366
5367 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5368 "missing argument attributes");
5369 AttributeList NewCallerPAL = AttributeList::get(
5370 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5371
5373 Call.getOperandBundlesAsDefs(OpBundles);
5374
5375 CallBase *NewCall;
5376 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5377 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5378 II->getUnwindDest(), Args, OpBundles);
5379 } else {
5380 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5381 cast<CallInst>(NewCall)->setTailCallKind(
5382 cast<CallInst>(Caller)->getTailCallKind());
5383 }
5384 NewCall->takeName(Caller);
5386 NewCall->setAttributes(NewCallerPAL);
5387
5388 // Preserve prof metadata if any.
5389 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5390
5391 // Insert a cast of the return type as necessary.
5392 Instruction *NC = NewCall;
5393 Value *NV = NC;
5394 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5395 assert(!NV->getType()->isVoidTy());
5397 NC->setDebugLoc(Caller->getDebugLoc());
5398
5399 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5400 assert(OptInsertPt && "No place to insert cast");
5401 InsertNewInstBefore(NC, *OptInsertPt);
5402 Worklist.pushUsersToWorkList(*Caller);
5403 }
5404
5405 if (!Caller->use_empty())
5406 replaceInstUsesWith(*Caller, NV);
5407 else if (Caller->hasValueHandle()) {
5408 if (OldRetTy == NV->getType())
5410 else
5411 // We cannot call ValueIsRAUWd with a different type, and the
5412 // actual tracked value will disappear.
5414 }
5415
5416 eraseInstFromFunction(*Caller);
5417 return true;
5418}
5419
5420/// Turn a call to a function created by init_trampoline / adjust_trampoline
5421/// intrinsic pair into a direct call to the underlying function.
5423InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5424 IntrinsicInst &Tramp) {
5425 FunctionType *FTy = Call.getFunctionType();
5426 AttributeList Attrs = Call.getAttributes();
5427
5428 // If the call already has the 'nest' attribute somewhere then give up -
5429 // otherwise 'nest' would occur twice after splicing in the chain.
5430 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5431 return nullptr;
5432
5434 FunctionType *NestFTy = NestF->getFunctionType();
5435
5436 AttributeList NestAttrs = NestF->getAttributes();
5437 if (!NestAttrs.isEmpty()) {
5438 unsigned NestArgNo = 0;
5439 Type *NestTy = nullptr;
5440 AttributeSet NestAttr;
5441
5442 // Look for a parameter marked with the 'nest' attribute.
5443 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5444 E = NestFTy->param_end();
5445 I != E; ++NestArgNo, ++I) {
5446 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5447 if (AS.hasAttribute(Attribute::Nest)) {
5448 // Record the parameter type and any other attributes.
5449 NestTy = *I;
5450 NestAttr = AS;
5451 break;
5452 }
5453 }
5454
5455 if (NestTy) {
5456 std::vector<Value*> NewArgs;
5457 std::vector<AttributeSet> NewArgAttrs;
5458 NewArgs.reserve(Call.arg_size() + 1);
5459 NewArgAttrs.reserve(Call.arg_size());
5460
5461 // Insert the nest argument into the call argument list, which may
5462 // mean appending it. Likewise for attributes.
5463
5464 {
5465 unsigned ArgNo = 0;
5466 auto I = Call.arg_begin(), E = Call.arg_end();
5467 do {
5468 if (ArgNo == NestArgNo) {
5469 // Add the chain argument and attributes.
5470 Value *NestVal = Tramp.getArgOperand(2);
5471 if (NestVal->getType() != NestTy)
5472 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5473 NewArgs.push_back(NestVal);
5474 NewArgAttrs.push_back(NestAttr);
5475 }
5476
5477 if (I == E)
5478 break;
5479
5480 // Add the original argument and attributes.
5481 NewArgs.push_back(*I);
5482 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5483
5484 ++ArgNo;
5485 ++I;
5486 } while (true);
5487 }
5488
5489 // The trampoline may have been bitcast to a bogus type (FTy).
5490 // Handle this by synthesizing a new function type, equal to FTy
5491 // with the chain parameter inserted.
5492
5493 std::vector<Type*> NewTypes;
5494 NewTypes.reserve(FTy->getNumParams()+1);
5495
5496 // Insert the chain's type into the list of parameter types, which may
5497 // mean appending it.
5498 {
5499 unsigned ArgNo = 0;
5500 FunctionType::param_iterator I = FTy->param_begin(),
5501 E = FTy->param_end();
5502
5503 do {
5504 if (ArgNo == NestArgNo)
5505 // Add the chain's type.
5506 NewTypes.push_back(NestTy);
5507
5508 if (I == E)
5509 break;
5510
5511 // Add the original type.
5512 NewTypes.push_back(*I);
5513
5514 ++ArgNo;
5515 ++I;
5516 } while (true);
5517 }
5518
5519 // Replace the trampoline call with a direct call. Let the generic
5520 // code sort out any function type mismatches.
5521 FunctionType *NewFTy =
5522 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5523 AttributeList NewPAL =
5524 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5525 Attrs.getRetAttrs(), NewArgAttrs);
5526
5528 Call.getOperandBundlesAsDefs(OpBundles);
5529
5530 Instruction *NewCaller;
5531 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5532 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5533 II->getUnwindDest(), NewArgs, OpBundles);
5534 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5535 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5536 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5537 NewCaller =
5538 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5539 CBI->getIndirectDests(), NewArgs, OpBundles);
5540 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5541 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5542 } else {
5543 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5544 cast<CallInst>(NewCaller)->setTailCallKind(
5545 cast<CallInst>(Call).getTailCallKind());
5546 cast<CallInst>(NewCaller)->setCallingConv(
5547 cast<CallInst>(Call).getCallingConv());
5548 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5549 }
5550 NewCaller->setDebugLoc(Call.getDebugLoc());
5551
5552 return NewCaller;
5553 }
5554 }
5555
5556 // Replace the trampoline call with a direct call. Since there is no 'nest'
5557 // parameter, there is no need to adjust the argument list. Let the generic
5558 // code sort out any function type mismatches.
5559 Call.setCalledFunction(FTy, NestF);
5560 return &Call;
5561}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static void referenceAspect(StringRef Aspect, StringRef ImplName, Module *M, IRBuilderBase &B)
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static bool isAspectNeeded(StringRef Aspect, CallInst *CI, std::optional< unsigned > FirstArgIdx, const std::optional< Bitset< 256 > > &Specifiers)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp1,...
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy)
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static Bitset< 256 > parseFormatStringSpecifiers(StringRef FormatStr)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:262
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:275
bool isNegative() const
Definition APFloat.h:1544
void clearSign()
Definition APFloat.h:1363
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1153
bool isZero() const
Definition APFloat.h:1540
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1203
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1213
bool isInfinity() const
Definition APFloat.h:1541
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1983
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1963
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1970
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1084
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2071
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1597
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1976
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:310
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:302
This class represents any memset intrinsic.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:194
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:271
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:314
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:329
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:275
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
This is a constexpr reimplementation of a subset of std::bitset.
Definition Bitset.h:30
constexpr bool any() const
Definition Bitset.h:113
constexpr Bitset & set()
Definition Bitset.h:81
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
static LLVM_ABI CallBase * removeOperandBundleAt(CallBase *CB, size_t Offset, InsertPosition InsertPtr=nullptr)
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:743
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:748
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:751
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:756
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:934
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:874
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI ConstantRange multiply(const ConstantRange &Other, unsigned NoWrapKind=0) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:221
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:216
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:618
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:602
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:251
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this GlobalObject.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:509
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1491
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2162
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2691
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:514
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2526
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2289
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
LLVM_ABI std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1069
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:271
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:113
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:107
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:130
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
static constexpr size_t npos
Definition StringRef.h:58
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:148
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:156
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1263
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1316
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:799
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:346
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:798
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrAdd_match< PointerOpTy, OffsetOpTy > m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp)
Matches GEP with i8 source element type.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
auto m_Constant()
Match an arbitrary Constant and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
auto m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
auto m_c_MaxOrMin(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
auto m_UnOp()
Match an arbitrary unary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
BundleAttr getBundleAttrFromOBU(OperandBundleUse OBU)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI AssumeSeparateStorageInfo getAssumeSeparateStorageInfo(OperandBundleUse)
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1746
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:253
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1701
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1732
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1646
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1682
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI AssumeNonNullInfo getAssumeNonNullInfo(OperandBundleUse)
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
constexpr unsigned BitWidth
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
LLVM_ABI AssumeAlignInfo getAssumeAlignInfo(OperandBundleUse)
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const SimplifyQuery &Q, bool IgnoreFree=false)
Equivalent to isDereferenceableAndAlignedPointer with an alignment of 1.
Definition Loads.cpp:264
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1719
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1759
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI AssumeDereferenceableInfo getAssumeDereferenceableInfo(OperandBundleUse)
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI AssumeNoUndefInfo getAssumeNoUndefInfo(OperandBundleUse)
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:288
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:109
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:294
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const