LLVM 23.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/Loads.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfo.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsHexagon.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
54#include "llvm/IR/Statepoint.h"
55#include "llvm/IR/Type.h"
56#include "llvm/IR/User.h"
57#include "llvm/IR/Value.h"
58#include "llvm/IR/ValueHandle.h"
63#include "llvm/Support/Debug.h"
74#include <algorithm>
75#include <cassert>
76#include <cstdint>
77#include <optional>
78#include <utility>
79#include <vector>
80
81#define DEBUG_TYPE "instcombine"
83
84using namespace llvm;
85using namespace PatternMatch;
86
87STATISTIC(NumSimplified, "Number of library calls simplified");
88
90 "instcombine-guard-widening-window",
91 cl::init(3),
92 cl::desc("How wide an instruction window to bypass looking for "
93 "another guard"));
94
95/// Return the specified type promoted as it would be to pass though a va_arg
96/// area.
98 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
99 if (ITy->getBitWidth() < 32)
100 return Type::getInt32Ty(Ty->getContext());
101 }
102 return Ty;
103}
104
105/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
106/// TODO: This should probably be integrated with visitAllocSites, but that
107/// requires a deeper change to allow either unread or unwritten objects.
109 auto *Src = MI->getRawSource();
110 while (isa<GetElementPtrInst>(Src)) {
111 if (!Src->hasOneUse())
112 return false;
113 Src = cast<Instruction>(Src)->getOperand(0);
114 }
115 return isa<AllocaInst>(Src) && Src->hasOneUse();
116}
117
119 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
120 MaybeAlign CopyDstAlign = MI->getDestAlign();
121 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
122 MI->setDestAlignment(DstAlign);
123 return MI;
124 }
125
126 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
127 MaybeAlign CopySrcAlign = MI->getSourceAlign();
128 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
129 MI->setSourceAlignment(SrcAlign);
130 return MI;
131 }
132
133 // If we have a store to a location which is known constant, we can conclude
134 // that the store must be storing the constant value (else the memory
135 // wouldn't be constant), and this must be a noop.
136 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
137 // Set the size of the copy to 0, it will be deleted on the next iteration.
138 MI->setLength((uint64_t)0);
139 return MI;
140 }
141
142 // If the source is provably undef, the memcpy/memmove doesn't do anything
143 // (unless the transfer is volatile).
144 if (hasUndefSource(MI) && !MI->isVolatile()) {
145 // Set the size of the copy to 0, it will be deleted on the next iteration.
146 MI->setLength((uint64_t)0);
147 return MI;
148 }
149
150 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
151 // load/store.
152 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
153 if (!MemOpLength) return nullptr;
154
155 // Source and destination pointer types are always "i8*" for intrinsic. See
156 // if the size is something we can handle with a single primitive load/store.
157 // A single load+store correctly handles overlapping memory in the memmove
158 // case.
159 uint64_t Size = MemOpLength->getLimitedValue();
160 assert(Size && "0-sized memory transferring should be removed already.");
161
162 if (Size > 8 || (Size&(Size-1)))
163 return nullptr; // If not 1/2/4/8 bytes, exit.
164
165 // If it is an atomic and alignment is less than the size then we will
166 // introduce the unaligned memory access which will be later transformed
167 // into libcall in CodeGen. This is not evident performance gain so disable
168 // it now.
169 if (MI->isAtomic())
170 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
171 return nullptr;
172
173 // Use an integer load+store unless we can find something better.
174 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
175
176 // If the memcpy has metadata describing the members, see if we can get the
177 // TBAA, scope and noalias tags describing our copy.
178 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
179
180 Value *Src = MI->getArgOperand(1);
181 Value *Dest = MI->getArgOperand(0);
182 LoadInst *L = Builder.CreateLoad(IntType, Src);
183 // Alignment from the mem intrinsic will be better, so use it.
184 L->setAlignment(*CopySrcAlign);
185 L->setAAMetadata(AACopyMD);
186 MDNode *LoopMemParallelMD =
187 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
188 if (LoopMemParallelMD)
189 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
190 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
191 if (AccessGroupMD)
192 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
193
194 StoreInst *S = Builder.CreateStore(L, Dest);
195 // Alignment from the mem intrinsic will be better, so use it.
196 S->setAlignment(*CopyDstAlign);
197 S->setAAMetadata(AACopyMD);
198 if (LoopMemParallelMD)
199 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
200 if (AccessGroupMD)
201 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
202 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
203
204 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
205 // non-atomics can be volatile
206 L->setVolatile(MT->isVolatile());
207 S->setVolatile(MT->isVolatile());
208 }
209 if (MI->isAtomic()) {
210 // atomics have to be unordered
211 L->setOrdering(AtomicOrdering::Unordered);
213 }
214
215 // Set the size of the copy to 0, it will be deleted on the next iteration.
216 MI->setLength((uint64_t)0);
217 return MI;
218}
219
221 const Align KnownAlignment =
222 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
223 MaybeAlign MemSetAlign = MI->getDestAlign();
224 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
225 MI->setDestAlignment(KnownAlignment);
226 return MI;
227 }
228
229 // If we have a store to a location which is known constant, we can conclude
230 // that the store must be storing the constant value (else the memory
231 // wouldn't be constant), and this must be a noop.
232 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
233 // Set the size of the copy to 0, it will be deleted on the next iteration.
234 MI->setLength((uint64_t)0);
235 return MI;
236 }
237
238 // Remove memset with an undef value.
239 // FIXME: This is technically incorrect because it might overwrite a poison
240 // value. Change to PoisonValue once #52930 is resolved.
241 if (isa<UndefValue>(MI->getValue())) {
242 // Set the size of the copy to 0, it will be deleted on the next iteration.
243 MI->setLength((uint64_t)0);
244 return MI;
245 }
246
247 // Extract the length and alignment and fill if they are constant.
248 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
249 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
250 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
251 return nullptr;
252 const uint64_t Len = LenC->getLimitedValue();
253 assert(Len && "0-sized memory setting should be removed already.");
254 const Align Alignment = MI->getDestAlign().valueOrOne();
255
256 // If it is an atomic and alignment is less than the size then we will
257 // introduce the unaligned memory access which will be later transformed
258 // into libcall in CodeGen. This is not evident performance gain so disable
259 // it now.
260 if (MI->isAtomic() && Alignment < Len)
261 return nullptr;
262
263 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
264 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
265 Value *Dest = MI->getDest();
266
267 // Extract the fill value and store.
268 Constant *FillVal = ConstantInt::get(
269 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
270 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
271 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
272 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
273 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
274 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
275 }
276
277 S->setAlignment(Alignment);
278 if (MI->isAtomic())
280
281 // Set the size of the copy to 0, it will be deleted on the next iteration.
282 MI->setLength((uint64_t)0);
283 return MI;
284 }
285
286 return nullptr;
287}
288
289// TODO, Obvious Missing Transforms:
290// * Narrow width by halfs excluding zero/undef lanes
291Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
292 Value *LoadPtr = II.getArgOperand(0);
293 const Align Alignment = II.getParamAlign(0).valueOrOne();
294
295 // If the mask is all ones or undefs, this is a plain vector load of the 1st
296 // argument.
297 if (maskIsAllOneOrUndef(II.getArgOperand(1))) {
298 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
299 "unmaskedload");
300 L->copyMetadata(II);
301 return L;
302 }
303
304 // If we can unconditionally load from this address, replace with a
305 // load/select idiom. TODO: use DT for context sensitive query
306 if (isDereferenceablePointer(LoadPtr, II.getType(),
307 II.getDataLayout(), &II, &AC)) {
308 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
309 "unmaskedload");
310 LI->copyMetadata(II);
311 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
312 }
313
314 return nullptr;
315}
316
317// TODO, Obvious Missing Transforms:
318// * Single constant active lane -> store
319// * Narrow width by halfs excluding zero/undef lanes
320Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
321 Value *StorePtr = II.getArgOperand(1);
322 Align Alignment = II.getParamAlign(1).valueOrOne();
323 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
324 if (!ConstMask)
325 return nullptr;
326
327 // If the mask is all zeros, this instruction does nothing.
328 if (maskIsAllZeroOrUndef(ConstMask))
330
331 // If the mask is all ones, this is a plain vector store of the 1st argument.
332 if (maskIsAllOneOrUndef(ConstMask)) {
333 StoreInst *S =
334 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
335 S->copyMetadata(II);
336 return S;
337 }
338
339 if (isa<ScalableVectorType>(ConstMask->getType()))
340 return nullptr;
341
342 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
343 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
344 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
345 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
346 PoisonElts))
347 return replaceOperand(II, 0, V);
348
349 return nullptr;
350}
351
352// TODO, Obvious Missing Transforms:
353// * Single constant active lane load -> load
354// * Dereferenceable address & few lanes -> scalarize speculative load/selects
355// * Adjacent vector addresses -> masked.load
356// * Narrow width by halfs excluding zero/undef lanes
357// * Vector incrementing address -> vector masked load
358Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
359 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
360 if (!ConstMask)
361 return nullptr;
362
363 // Vector splat address w/known mask -> scalar load
364 // Fold the gather to load the source vector first lane
365 // because it is reloading the same value each time
366 if (ConstMask->isAllOnesValue())
367 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
368 auto *VecTy = cast<VectorType>(II.getType());
369 const Align Alignment = II.getParamAlign(0).valueOrOne();
370 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
371 Alignment, "load.scalar");
372 Value *Shuf =
373 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
375 }
376
377 return nullptr;
378}
379
380// TODO, Obvious Missing Transforms:
381// * Single constant active lane -> store
382// * Adjacent vector addresses -> masked.store
383// * Narrow store width by halfs excluding zero/undef lanes
384// * Vector incrementing address -> vector masked store
385Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
386 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
387 if (!ConstMask)
388 return nullptr;
389
390 // If the mask is all zeros, a scatter does nothing.
391 if (maskIsAllZeroOrUndef(ConstMask))
393
394 // Vector splat address -> scalar store
395 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
396 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
397 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
398 if (maskContainsAllOneOrUndef(ConstMask)) {
399 Align Alignment = II.getParamAlign(1).valueOrOne();
400 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
401 Alignment);
402 S->copyMetadata(II);
403 return S;
404 }
405 }
406 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
407 // lastlane), ptr
408 if (ConstMask->isAllOnesValue()) {
409 Align Alignment = II.getParamAlign(1).valueOrOne();
410 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
411 ElementCount VF = WideLoadTy->getElementCount();
412 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
413 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
414 Value *Extract =
415 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
416 StoreInst *S =
417 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
418 S->copyMetadata(II);
419 return S;
420 }
421 }
422 if (isa<ScalableVectorType>(ConstMask->getType()))
423 return nullptr;
424
425 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
426 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
427 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
428 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
429 PoisonElts))
430 return replaceOperand(II, 0, V);
431 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
432 PoisonElts))
433 return replaceOperand(II, 1, V);
434
435 return nullptr;
436}
437
438/// This function transforms launder.invariant.group and strip.invariant.group
439/// like:
440/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
441/// launder(strip(%x)) -> launder(%x)
442/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
443/// strip(launder(%x)) -> strip(%x)
444/// This is legal because it preserves the most recent information about
445/// the presence or absence of invariant.group.
447 InstCombinerImpl &IC) {
448 auto *Arg = II.getArgOperand(0);
449 auto *StrippedArg = Arg->stripPointerCasts();
450 auto *StrippedInvariantGroupsArg = StrippedArg;
451 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
452 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
453 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
454 break;
455 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
456 }
457 if (StrippedArg == StrippedInvariantGroupsArg)
458 return nullptr; // No launders/strips to remove.
459
460 Value *Result = nullptr;
461
462 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
463 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
464 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
465 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
466 else
468 "simplifyInvariantGroupIntrinsic only handles launder and strip");
469 if (Result->getType()->getPointerAddressSpace() !=
470 II.getType()->getPointerAddressSpace())
471 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
472
473 return cast<Instruction>(Result);
474}
475
477 assert((II.getIntrinsicID() == Intrinsic::cttz ||
478 II.getIntrinsicID() == Intrinsic::ctlz) &&
479 "Expected cttz or ctlz intrinsic");
480 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
481 Value *Op0 = II.getArgOperand(0);
482 Value *Op1 = II.getArgOperand(1);
483 Value *X;
484 // ctlz(bitreverse(x)) -> cttz(x)
485 // cttz(bitreverse(x)) -> ctlz(x)
486 if (match(Op0, m_BitReverse(m_Value(X)))) {
487 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
488 Function *F =
489 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
490 return CallInst::Create(F, {X, II.getArgOperand(1)});
491 }
492
493 if (II.getType()->isIntOrIntVectorTy(1)) {
494 // ctlz/cttz i1 Op0 --> not Op0
495 if (match(Op1, m_Zero()))
496 return BinaryOperator::CreateNot(Op0);
497 // If zero is poison, then the input can be assumed to be "true", so the
498 // instruction simplifies to "false".
499 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
500 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
501 }
502
503 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
504 if (II.hasOneUse() && match(Op1, m_Zero()) &&
505 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
506 II.dropUBImplyingAttrsAndMetadata();
507 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
508 }
509
510 Constant *C;
511
512 if (IsTZ) {
513 // cttz(-x) -> cttz(x)
514 if (match(Op0, m_Neg(m_Value(X))))
515 return IC.replaceOperand(II, 0, X);
516
517 // cttz(-x & x) -> cttz(x)
518 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
519 return IC.replaceOperand(II, 0, X);
520
521 // cttz(sext(x)) -> cttz(zext(x))
522 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
523 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
524 auto *CttzZext =
525 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
526 return IC.replaceInstUsesWith(II, CttzZext);
527 }
528
529 // Zext doesn't change the number of trailing zeros, so narrow:
530 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
531 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
532 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
533 IC.Builder.getTrue());
534 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
535 return IC.replaceInstUsesWith(II, ZextCttz);
536 }
537
538 // cttz(abs(x)) -> cttz(x)
539 // cttz(nabs(x)) -> cttz(x)
540 Value *Y;
542 if (SPF == SPF_ABS || SPF == SPF_NABS)
543 return IC.replaceOperand(II, 0, X);
544
546 return IC.replaceOperand(II, 0, X);
547
548 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
549 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
550 match(Op1, m_One())) {
551 Value *ConstCttz =
552 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
553 return BinaryOperator::CreateAdd(ConstCttz, X);
554 }
555
556 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
557 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
558 match(Op1, m_One())) {
559 Value *ConstCttz =
560 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
561 return BinaryOperator::CreateSub(ConstCttz, X);
562 }
563
564 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
565 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
566 Value *Width =
567 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
568 return BinaryOperator::CreateSub(Width, X);
569 }
570 } else {
571 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
572 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
573 match(Op1, m_One())) {
574 Value *ConstCtlz =
575 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
576 return BinaryOperator::CreateAdd(ConstCtlz, X);
577 }
578
579 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
580 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
581 match(Op1, m_One())) {
582 Value *ConstCtlz =
583 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
584 return BinaryOperator::CreateSub(ConstCtlz, X);
585 }
586
587 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
588 if (Op0->hasOneUse() &&
589 match(Op0,
591 Type *Ty = II.getType();
592 unsigned BitWidth = Ty->getScalarSizeInBits();
593 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
594 {X, IC.Builder.getFalse()});
595 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
596 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
597 }
598 }
599
600 // cttz(Pow2) -> Log2(Pow2)
601 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
602 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
603 if (IsTZ)
604 return IC.replaceInstUsesWith(II, R);
605 BinaryOperator *BO = BinaryOperator::CreateSub(
606 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
607 R);
608 BO->setHasNoSignedWrap();
610 return BO;
611 }
612
613 KnownBits Known = IC.computeKnownBits(Op0, &II);
614
615 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
616 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
617 : Known.countMaxLeadingZeros();
618 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
619 : Known.countMinLeadingZeros();
620
621 // If all bits above (ctlz) or below (cttz) the first known one are known
622 // zero, this value is constant.
623 // FIXME: This should be in InstSimplify because we're replacing an
624 // instruction with a constant.
625 if (PossibleZeros == DefiniteZeros) {
626 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
627 return IC.replaceInstUsesWith(II, C);
628 }
629
630 // If the input to cttz/ctlz is known to be non-zero,
631 // then change the 'ZeroIsPoison' parameter to 'true'
632 // because we know the zero behavior can't affect the result.
633 if (!Known.One.isZero() ||
635 if (!match(II.getArgOperand(1), m_One()))
636 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
637 }
638
639 // Add range attribute since known bits can't completely reflect what we know.
640 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
641 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
642 !II.getMetadata(LLVMContext::MD_range)) {
643 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
644 APInt(BitWidth, PossibleZeros + 1));
645 II.addRangeRetAttr(Range);
646 return &II;
647 }
648
649 return nullptr;
650}
651
653 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
654 "Expected ctpop intrinsic");
655 Type *Ty = II.getType();
656 unsigned BitWidth = Ty->getScalarSizeInBits();
657 Value *Op0 = II.getArgOperand(0);
658 Value *X, *Y;
659
660 // ctpop(bitreverse(x)) -> ctpop(x)
661 // ctpop(bswap(x)) -> ctpop(x)
662 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
663 return IC.replaceOperand(II, 0, X);
664
665 // ctpop(rot(x)) -> ctpop(x)
666 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
667 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
668 X == Y)
669 return IC.replaceOperand(II, 0, X);
670
671 // ctpop(x | -x) -> bitwidth - cttz(x, false)
672 if (Op0->hasOneUse() &&
673 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
674 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
675 {X, IC.Builder.getFalse()});
676 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
677 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
678 }
679
680 // ctpop(~x & (x - 1)) -> cttz(x, false)
681 if (match(Op0,
683 Function *F =
684 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
685 return CallInst::Create(F, {X, IC.Builder.getFalse()});
686 }
687
688 // Zext doesn't change the number of set bits, so narrow:
689 // ctpop (zext X) --> zext (ctpop X)
690 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
691 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
692 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
693 }
694
695 KnownBits Known(BitWidth);
696 IC.computeKnownBits(Op0, Known, &II);
697
698 // If all bits are zero except for exactly one fixed bit, then the result
699 // must be 0 or 1, and we can get that answer by shifting to LSB:
700 // ctpop (X & 32) --> (X & 32) >> 5
701 // TODO: Investigate removing this as its likely unnecessary given the below
702 // `isKnownToBeAPowerOfTwo` check.
703 if ((~Known.Zero).isPowerOf2())
704 return BinaryOperator::CreateLShr(
705 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
706
707 // More generally we can also handle non-constant power of 2 patterns such as
708 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
709 // ctpop(Pow2OrZero) --> icmp ne X, 0
710 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
711 return CastInst::Create(Instruction::ZExt,
714 Ty);
715
716 // Add range attribute since known bits can't completely reflect what we know.
717 if (BitWidth != 1) {
718 ConstantRange OldRange =
719 II.getRange().value_or(ConstantRange::getFull(BitWidth));
720
721 unsigned Lower = Known.countMinPopulation();
722 unsigned Upper = Known.countMaxPopulation() + 1;
723
724 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
726 Lower = 1;
727
729 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
730
731 if (Range != OldRange) {
732 II.addRangeRetAttr(Range);
733 return &II;
734 }
735 }
736
737 return nullptr;
738}
739
740/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
741/// at most two source operands are actually referenced.
743 bool IsExtension) {
744 // Bail out if the mask is not a constant.
745 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
746 if (!C)
747 return nullptr;
748
749 auto *RetTy = cast<FixedVectorType>(II.getType());
750 unsigned NumIndexes = RetTy->getNumElements();
751
752 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
753 if (!RetTy->getElementType()->isIntegerTy(8) ||
754 (NumIndexes != 8 && NumIndexes != 16))
755 return nullptr;
756
757 // For tbx instructions, the first argument is the "fallback" vector, which
758 // has the same length as the mask and return type.
759 unsigned int StartIndex = (unsigned)IsExtension;
760 auto *SourceTy =
761 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
762 // Note that the element count of each source vector does *not* need to be the
763 // same as the element count of the return type and mask! All source vectors
764 // must have the same element count as each other, though.
765 unsigned NumElementsPerSource = SourceTy->getNumElements();
766
767 // There are no tbl/tbx intrinsics for which the destination size exceeds the
768 // source size. However, our definitions of the intrinsics, at least in
769 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
770 // *could* technically happen.
771 if (NumIndexes > NumElementsPerSource)
772 return nullptr;
773
774 // The tbl/tbx intrinsics take several source operands followed by a mask
775 // operand.
776 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
777
778 // Map input operands to shuffle indices. This also helpfully deduplicates the
779 // input arguments, in case the same value is passed as an argument multiple
780 // times.
781 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
782 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
783 PoisonValue::get(SourceTy)};
784
785 int Indexes[16];
786 for (unsigned I = 0; I < NumIndexes; ++I) {
787 Constant *COp = C->getAggregateElement(I);
788
789 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
790 return nullptr;
791
792 if (isa<UndefValue>(COp)) {
793 Indexes[I] = -1;
794 continue;
795 }
796
797 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
798 // The index of the input argument that this index references (0 = first
799 // source argument, etc).
800 unsigned SourceOperandIndex = Index / NumElementsPerSource;
801 // The index of the element at that source operand.
802 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
803
804 Value *SourceOperand;
805 if (SourceOperandIndex >= NumSourceOperands) {
806 // This index is out of bounds. Map it to index into either the fallback
807 // vector (tbx) or vector of zeroes (tbl).
808 SourceOperandIndex = NumSourceOperands;
809 if (IsExtension) {
810 // For out-of-bounds indices in tbx, choose the `I`th element of the
811 // fallback.
812 SourceOperand = II.getArgOperand(0);
813 SourceOperandElementIndex = I;
814 } else {
815 // Otherwise, choose some element from the dummy vector of zeroes (we'll
816 // always choose the first).
817 SourceOperand = Constant::getNullValue(SourceTy);
818 SourceOperandElementIndex = 0;
819 }
820 } else {
821 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
822 }
823
824 // The source operand may be the fallback vector, which may not have the
825 // same number of elements as the source vector. In that case, we *could*
826 // choose to extend its length with another shufflevector, but it's simpler
827 // to just bail instead.
828 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
829 NumElementsPerSource)
830 return nullptr;
831
832 // We now know the source operand referenced by this index. Make it a
833 // shufflevector operand, if it isn't already.
834 unsigned NumSlots = ValueToShuffleSlot.size();
835 // This shuffle references more than two sources, and hence cannot be
836 // represented as a shufflevector.
837 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
838 return nullptr;
839
840 auto [It, Inserted] =
841 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
842 if (Inserted)
843 ShuffleOperands[It->getSecond()] = SourceOperand;
844
845 unsigned RemappedIndex =
846 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
847 Indexes[I] = RemappedIndex;
848 }
849
851 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
852 return IC.replaceInstUsesWith(II, Shuf);
853}
854
855// Returns true iff the 2 intrinsics have the same operands, limiting the
856// comparison to the first NumOperands.
857static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
858 unsigned NumOperands) {
859 assert(I.arg_size() >= NumOperands && "Not enough operands");
860 assert(E.arg_size() >= NumOperands && "Not enough operands");
861 for (unsigned i = 0; i < NumOperands; i++)
862 if (I.getArgOperand(i) != E.getArgOperand(i))
863 return false;
864 return true;
865}
866
867// Remove trivially empty start/end intrinsic ranges, i.e. a start
868// immediately followed by an end (ignoring debuginfo or other
869// start/end intrinsics in between). As this handles only the most trivial
870// cases, tracking the nesting level is not needed:
871//
872// call @llvm.foo.start(i1 0)
873// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
874// call @llvm.foo.end(i1 0)
875// call @llvm.foo.end(i1 0) ; &I
876static bool
878 std::function<bool(const IntrinsicInst &)> IsStart) {
879 // We start from the end intrinsic and scan backwards, so that InstCombine
880 // has already processed (and potentially removed) all the instructions
881 // before the end intrinsic.
882 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
883 for (; BI != BE; ++BI) {
884 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
885 if (I->isDebugOrPseudoInst() ||
886 I->getIntrinsicID() == EndI.getIntrinsicID())
887 continue;
888 if (IsStart(*I)) {
889 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
891 IC.eraseInstFromFunction(EndI);
892 return true;
893 }
894 // Skip start intrinsics that don't pair with this end intrinsic.
895 continue;
896 }
897 }
898 break;
899 }
900
901 return false;
902}
903
905 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
906 // Bail out on the case where the source va_list of a va_copy is destroyed
907 // immediately by a follow-up va_end.
908 return II.getIntrinsicID() == Intrinsic::vastart ||
909 (II.getIntrinsicID() == Intrinsic::vacopy &&
910 I.getArgOperand(0) != II.getArgOperand(1));
911 });
912 return nullptr;
913}
914
916 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
917 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
918 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
919 Call.setArgOperand(0, Arg1);
920 Call.setArgOperand(1, Arg0);
921 return &Call;
922 }
923 return nullptr;
924}
925
926/// Creates a result tuple for an overflow intrinsic \p II with a given
927/// \p Result and a constant \p Overflow value.
929 Constant *Overflow) {
930 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
931 StructType *ST = cast<StructType>(II->getType());
932 Constant *Struct = ConstantStruct::get(ST, V);
933 return InsertValueInst::Create(Struct, Result, 0);
934}
935
937InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
938 WithOverflowInst *WO = cast<WithOverflowInst>(II);
939 Value *OperationResult = nullptr;
940 Constant *OverflowResult = nullptr;
941 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
942 WO->getRHS(), *WO, OperationResult, OverflowResult))
943 return createOverflowTuple(WO, OperationResult, OverflowResult);
944
945 // See whether we can optimize the overflow check with assumption information.
946 for (User *U : WO->users()) {
947 if (!match(U, m_ExtractValue<1>(m_Value())))
948 continue;
949
950 for (auto &AssumeVH : AC.assumptionsFor(U)) {
951 if (!AssumeVH)
952 continue;
953 CallInst *I = cast<CallInst>(AssumeVH);
954 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
955 continue;
956 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
957 /*AllowEphemerals=*/true))
958 continue;
959 Value *Result =
960 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
961 Result->takeName(WO);
962 if (auto *Inst = dyn_cast<Instruction>(Result)) {
963 if (WO->isSigned())
964 Inst->setHasNoSignedWrap();
965 else
966 Inst->setHasNoUnsignedWrap();
967 }
968 return createOverflowTuple(WO, Result,
969 ConstantInt::getFalse(U->getType()));
970 }
971 }
972
973 return nullptr;
974}
975
976static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
977 Ty = Ty->getScalarType();
978 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
979}
980
981static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
982 Ty = Ty->getScalarType();
983 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
984}
985
986/// \returns the compare predicate type if the test performed by
987/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
988/// floating-point environment assumed for \p F for type \p Ty
990 const Function &F, Type *Ty) {
991 switch (static_cast<unsigned>(Mask)) {
992 case fcZero:
993 if (inputDenormalIsIEEE(F, Ty))
994 return FCmpInst::FCMP_OEQ;
995 break;
996 case fcZero | fcSubnormal:
997 if (inputDenormalIsDAZ(F, Ty))
998 return FCmpInst::FCMP_OEQ;
999 break;
1000 case fcPositive | fcNegZero:
1001 if (inputDenormalIsIEEE(F, Ty))
1002 return FCmpInst::FCMP_OGE;
1003 break;
1005 if (inputDenormalIsDAZ(F, Ty))
1006 return FCmpInst::FCMP_OGE;
1007 break;
1009 if (inputDenormalIsIEEE(F, Ty))
1010 return FCmpInst::FCMP_OGT;
1011 break;
1012 case fcNegative | fcPosZero:
1013 if (inputDenormalIsIEEE(F, Ty))
1014 return FCmpInst::FCMP_OLE;
1015 break;
1017 if (inputDenormalIsDAZ(F, Ty))
1018 return FCmpInst::FCMP_OLE;
1019 break;
1021 if (inputDenormalIsIEEE(F, Ty))
1022 return FCmpInst::FCMP_OLT;
1023 break;
1024 case fcPosNormal | fcPosInf:
1025 if (inputDenormalIsDAZ(F, Ty))
1026 return FCmpInst::FCMP_OGT;
1027 break;
1028 case fcNegNormal | fcNegInf:
1029 if (inputDenormalIsDAZ(F, Ty))
1030 return FCmpInst::FCMP_OLT;
1031 break;
1032 case ~fcZero & ~fcNan:
1033 if (inputDenormalIsIEEE(F, Ty))
1034 return FCmpInst::FCMP_ONE;
1035 break;
1036 case ~(fcZero | fcSubnormal) & ~fcNan:
1037 if (inputDenormalIsDAZ(F, Ty))
1038 return FCmpInst::FCMP_ONE;
1039 break;
1040 default:
1041 break;
1042 }
1043
1045}
1046
1047Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1048 Value *Src0 = II.getArgOperand(0);
1049 Value *Src1 = II.getArgOperand(1);
1050 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1051 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1052 const bool IsUnordered = (Mask & fcNan) == fcNan;
1053 const bool IsOrdered = (Mask & fcNan) == fcNone;
1054 const FPClassTest OrderedMask = Mask & ~fcNan;
1055 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1056
1057 const bool IsStrict =
1058 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1059
1060 Value *FNegSrc;
1061 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
1062 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1063
1064 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
1065 return replaceOperand(II, 0, FNegSrc);
1066 }
1067
1068 Value *FAbsSrc;
1069 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
1070 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
1071 return replaceOperand(II, 0, FAbsSrc);
1072 }
1073
1074 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1075 (IsOrdered || IsUnordered) && !IsStrict) {
1076 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1077 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1078 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1079 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1081 FCmpInst::Predicate Pred =
1082 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1083 if (OrderedInvertedMask == fcInf)
1084 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1085
1086 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Src0);
1087 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1088 CmpInf->takeName(&II);
1089 return replaceInstUsesWith(II, CmpInf);
1090 }
1091
1092 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1093 (IsOrdered || IsUnordered) && !IsStrict) {
1094 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1095 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1096 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1097 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1098 Constant *Inf =
1099 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1100 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1101 : Builder.CreateFCmpOEQ(Src0, Inf);
1102
1103 EqInf->takeName(&II);
1104 return replaceInstUsesWith(II, EqInf);
1105 }
1106
1107 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1108 (IsOrdered || IsUnordered) && !IsStrict) {
1109 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1110 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1111 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1112 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1114 OrderedInvertedMask == fcNegInf);
1115 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1116 : Builder.CreateFCmpONE(Src0, Inf);
1117 NeInf->takeName(&II);
1118 return replaceInstUsesWith(II, NeInf);
1119 }
1120
1121 if (Mask == fcNan && !IsStrict) {
1122 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1123 // exceptions.
1124 Value *IsNan =
1125 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1126 IsNan->takeName(&II);
1127 return replaceInstUsesWith(II, IsNan);
1128 }
1129
1130 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1131 // Equivalent of !isnan. Replace with standard fcmp.
1132 Value *FCmp =
1133 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1134 FCmp->takeName(&II);
1135 return replaceInstUsesWith(II, FCmp);
1136 }
1137
1139
1140 // Try to replace with an fcmp with 0
1141 //
1142 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1143 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1144 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1145 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1146 //
1147 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1148 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1149 //
1150 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1151 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1152 //
1153 if (!IsStrict && (IsOrdered || IsUnordered) &&
1154 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1155 Src0->getType())) !=
1158 // Equivalent of == 0.
1159 Value *FCmp = Builder.CreateFCmp(
1160 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1161 Src0, Zero);
1162
1163 FCmp->takeName(&II);
1164 return replaceInstUsesWith(II, FCmp);
1165 }
1166
1167 KnownFPClass Known = computeKnownFPClass(Src0, Mask, &II);
1168
1169 // Clear test bits we know must be false from the source value.
1170 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1171 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1172 if ((Mask & Known.KnownFPClasses) != Mask) {
1173 II.setArgOperand(
1174 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1175 return &II;
1176 }
1177
1178 // If none of the tests which can return false are possible, fold to true.
1179 // fp_class (nnan x), ~(qnan|snan) -> true
1180 // fp_class (ninf x), ~(ninf|pinf) -> true
1181 if (Mask == Known.KnownFPClasses)
1182 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1183
1184 return nullptr;
1185}
1186
1187static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1188 KnownBits Known = computeKnownBits(Op, SQ);
1189 if (Known.isNonNegative())
1190 return false;
1191 if (Known.isNegative())
1192 return true;
1193
1194 Value *X, *Y;
1195 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1197
1198 return std::nullopt;
1199}
1200
1201static std::optional<bool> getKnownSignOrZero(Value *Op,
1202 const SimplifyQuery &SQ) {
1203 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1204 return Sign;
1205
1206 Value *X, *Y;
1207 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1209
1210 return std::nullopt;
1211}
1212
1213/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1214static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1215 const SimplifyQuery &SQ) {
1216 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1217 if (!Known1)
1218 return false;
1219 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1220 if (!Known0)
1221 return false;
1222 return *Known0 == *Known1;
1223}
1224
1225/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1226/// can trigger other combines.
1228 InstCombiner::BuilderTy &Builder) {
1229 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1230 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1231 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1232 "Expected a min or max intrinsic");
1233
1234 // TODO: Match vectors with undef elements, but undef may not propagate.
1235 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1236 Value *X;
1237 const APInt *C0, *C1;
1238 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1239 !match(Op1, m_APInt(C1)))
1240 return nullptr;
1241
1242 // Check for necessary no-wrap and overflow constraints.
1243 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1244 auto *Add = cast<BinaryOperator>(Op0);
1245 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1246 (!IsSigned && !Add->hasNoUnsignedWrap()))
1247 return nullptr;
1248
1249 // If the constant difference overflows, then instsimplify should reduce the
1250 // min/max to the add or C1.
1251 bool Overflow;
1252 APInt CDiff =
1253 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1254 assert(!Overflow && "Expected simplify of min/max");
1255
1256 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1257 // Note: the "mismatched" no-overflow setting does not propagate.
1258 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1259 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1260 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1261 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1262}
1263/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1264Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1265 Type *Ty = MinMax1.getType();
1266
1267 // We are looking for a tree of:
1268 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1269 // Where the min and max could be reversed
1270 Instruction *MinMax2;
1271 BinaryOperator *AddSub;
1272 const APInt *MinValue, *MaxValue;
1273 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1274 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1275 return nullptr;
1276 } else if (match(&MinMax1,
1277 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1278 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1279 return nullptr;
1280 } else
1281 return nullptr;
1282
1283 // Check that the constants clamp a saturate, and that the new type would be
1284 // sensible to convert to.
1285 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1286 return nullptr;
1287 // In what bitwidth can this be treated as saturating arithmetics?
1288 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1289 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1290 // good first approximation for what should be done there.
1291 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1292 return nullptr;
1293
1294 // Also make sure that the inner min/max and the add/sub have one use.
1295 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1296 return nullptr;
1297
1298 // Create the new type (which can be a vector type)
1299 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1300
1301 Intrinsic::ID IntrinsicID;
1302 if (AddSub->getOpcode() == Instruction::Add)
1303 IntrinsicID = Intrinsic::sadd_sat;
1304 else if (AddSub->getOpcode() == Instruction::Sub)
1305 IntrinsicID = Intrinsic::ssub_sat;
1306 else
1307 return nullptr;
1308
1309 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1310 // is usually achieved via a sext from a smaller type.
1311 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1312 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1313 return nullptr;
1314
1315 // Finally create and return the sat intrinsic, truncated to the new type
1316 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1317 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1318 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1319 return CastInst::Create(Instruction::SExt, Sat, Ty);
1320}
1321
1322
1323/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1324/// can only be one of two possible constant values -- turn that into a select
1325/// of constants.
1327 InstCombiner::BuilderTy &Builder) {
1328 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1329 Value *X;
1330 const APInt *C0, *C1;
1331 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1332 return nullptr;
1333
1335 switch (II->getIntrinsicID()) {
1336 case Intrinsic::smax:
1337 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1338 Pred = ICmpInst::ICMP_SGT;
1339 break;
1340 case Intrinsic::smin:
1341 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1342 Pred = ICmpInst::ICMP_SLT;
1343 break;
1344 case Intrinsic::umax:
1345 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1346 Pred = ICmpInst::ICMP_UGT;
1347 break;
1348 case Intrinsic::umin:
1349 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1350 Pred = ICmpInst::ICMP_ULT;
1351 break;
1352 default:
1353 llvm_unreachable("Expected min/max intrinsic");
1354 }
1355 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1356 return nullptr;
1357
1358 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1359 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1360 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1361 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1362}
1363
1364/// If this min/max has a constant operand and an operand that is a matching
1365/// min/max with a constant operand, constant-fold the 2 constant operands.
1367 IRBuilderBase &Builder,
1368 const SimplifyQuery &SQ) {
1369 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1370 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1371 if (!LHS)
1372 return nullptr;
1373
1374 Constant *C0, *C1;
1375 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1376 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1377 return nullptr;
1378
1379 // max (max X, C0), C1 --> max X, (max C0, C1)
1380 // min (min X, C0), C1 --> min X, (min C0, C1)
1381 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1382 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1383 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1384 if (InnerMinMaxID != MinMaxID &&
1385 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1386 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1387 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1388 return nullptr;
1389
1391 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1392 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1393 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1394 {LHS->getArgOperand(0), NewC});
1395}
1396
1397/// If this min/max has a matching min/max operand with a constant, try to push
1398/// the constant operand into this instruction. This can enable more folds.
1399static Instruction *
1401 InstCombiner::BuilderTy &Builder) {
1402 // Match and capture a min/max operand candidate.
1403 Value *X, *Y;
1404 Constant *C;
1405 Instruction *Inner;
1407 m_Instruction(Inner),
1409 m_Value(Y))))
1410 return nullptr;
1411
1412 // The inner op must match. Check for constants to avoid infinite loops.
1413 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1414 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1415 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1417 return nullptr;
1418
1419 // max (max X, C), Y --> max (max X, Y), C
1421 MinMaxID, II->getType());
1422 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1423 NewInner->takeName(Inner);
1424 return CallInst::Create(MinMax, {NewInner, C});
1425}
1426
1427/// Reduce a sequence of min/max intrinsics with a common operand.
1429 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1430 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1431 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1432 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1433 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1434 RHS->getIntrinsicID() != MinMaxID ||
1435 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1436 return nullptr;
1437
1438 Value *A = LHS->getArgOperand(0);
1439 Value *B = LHS->getArgOperand(1);
1440 Value *C = RHS->getArgOperand(0);
1441 Value *D = RHS->getArgOperand(1);
1442
1443 // Look for a common operand.
1444 Value *MinMaxOp = nullptr;
1445 Value *ThirdOp = nullptr;
1446 if (LHS->hasOneUse()) {
1447 // If the LHS is only used in this chain and the RHS is used outside of it,
1448 // reuse the RHS min/max because that will eliminate the LHS.
1449 if (D == A || C == A) {
1450 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1451 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1452 MinMaxOp = RHS;
1453 ThirdOp = B;
1454 } else if (D == B || C == B) {
1455 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1456 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1457 MinMaxOp = RHS;
1458 ThirdOp = A;
1459 }
1460 } else {
1461 assert(RHS->hasOneUse() && "Expected one-use operand");
1462 // Reuse the LHS. This will eliminate the RHS.
1463 if (D == A || D == B) {
1464 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1465 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1466 MinMaxOp = LHS;
1467 ThirdOp = C;
1468 } else if (C == A || C == B) {
1469 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1470 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1471 MinMaxOp = LHS;
1472 ThirdOp = D;
1473 }
1474 }
1475
1476 if (!MinMaxOp || !ThirdOp)
1477 return nullptr;
1478
1479 Module *Mod = II->getModule();
1480 Function *MinMax =
1481 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1482 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1483}
1484
1485/// If all arguments of the intrinsic are unary shuffles with the same mask,
1486/// try to shuffle after the intrinsic.
1489 if (!II->getType()->isVectorTy() ||
1490 !isTriviallyVectorizable(II->getIntrinsicID()) ||
1491 !II->getCalledFunction()->isSpeculatable())
1492 return nullptr;
1493
1494 Value *X;
1495 Constant *C;
1496 ArrayRef<int> Mask;
1497 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1498 return isa<Constant>(Arg.get()) ||
1499 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1500 Arg.getOperandNo(), nullptr);
1501 });
1502 if (!NonConstArg ||
1503 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1504 return nullptr;
1505
1506 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1507 // instructions.
1508 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1509 return nullptr;
1510
1511 // See if all arguments are shuffled with the same mask.
1513 Type *SrcTy = X->getType();
1514 for (Use &Arg : II->args()) {
1515 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1516 Arg.getOperandNo(), nullptr))
1517 NewArgs.push_back(Arg);
1518 else if (match(&Arg,
1519 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1520 X->getType() == SrcTy)
1521 NewArgs.push_back(X);
1522 else if (match(&Arg, m_ImmConstant(C))) {
1523 // If it's a constant, try find the constant that would be shuffled to C.
1524 if (Constant *ShuffledC =
1525 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1526 NewArgs.push_back(ShuffledC);
1527 else
1528 return nullptr;
1529 } else
1530 return nullptr;
1531 }
1532
1533 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1534 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1535 // Result type might be a different vector width.
1536 // TODO: Check that the result type isn't widened?
1537 VectorType *ResTy =
1538 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1539 Value *NewIntrinsic =
1540 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1541 return new ShuffleVectorInst(NewIntrinsic, Mask);
1542}
1543
1544/// If all arguments of the intrinsic are reverses, try to pull the reverse
1545/// after the intrinsic.
1547 if (!isTriviallyVectorizable(II->getIntrinsicID()))
1548 return nullptr;
1549
1550 // At least 1 operand must be a reverse with 1 use because we are creating 2
1551 // instructions.
1552 if (none_of(II->args(), [](Value *V) {
1553 return match(V, m_OneUse(m_VecReverse(m_Value())));
1554 }))
1555 return nullptr;
1556
1557 Value *X;
1558 Constant *C;
1559 SmallVector<Value *> NewArgs;
1560 for (Use &Arg : II->args()) {
1561 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1562 Arg.getOperandNo(), nullptr))
1563 NewArgs.push_back(Arg);
1564 else if (match(&Arg, m_VecReverse(m_Value(X))))
1565 NewArgs.push_back(X);
1566 else if (isSplatValue(Arg))
1567 NewArgs.push_back(Arg);
1568 else if (match(&Arg, m_ImmConstant(C)))
1569 NewArgs.push_back(Builder.CreateVectorReverse(C));
1570 else
1571 return nullptr;
1572 }
1573
1574 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1575 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1576 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1577 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1578 return Builder.CreateVectorReverse(NewIntrinsic);
1579}
1580
1581/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1582/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1583/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1584template <Intrinsic::ID IntrID>
1586 InstCombiner::BuilderTy &Builder) {
1587 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1588 "This helper only supports BSWAP and BITREVERSE intrinsics");
1589
1590 Value *X, *Y;
1591 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1592 // don't match ConstantExpr that aren't meaningful for this transform.
1595 Value *OldReorderX, *OldReorderY;
1597
1598 // If both X and Y are bswap/bitreverse, the transform reduces the number
1599 // of instructions even if there's multiuse.
1600 // If only one operand is bswap/bitreverse, we need to ensure the operand
1601 // have only one use.
1602 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1603 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1604 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1605 }
1606
1607 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1608 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1609 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1610 }
1611
1612 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1613 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1614 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1615 }
1616 }
1617 return nullptr;
1618}
1619
1620/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1621/// `f(f(x, y), y) == f(x, y)` holds.
1623 switch (IID) {
1624 case Intrinsic::smax:
1625 case Intrinsic::smin:
1626 case Intrinsic::umax:
1627 case Intrinsic::umin:
1628 case Intrinsic::maximum:
1629 case Intrinsic::minimum:
1630 case Intrinsic::maximumnum:
1631 case Intrinsic::minimumnum:
1632 case Intrinsic::maxnum:
1633 case Intrinsic::minnum:
1634 return true;
1635 default:
1636 return false;
1637 }
1638}
1639
1640/// Attempt to simplify value-accumulating recurrences of kind:
1641/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1642/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1643/// And let the idempotent binary intrinsic be hoisted, when the operands are
1644/// known to be loop-invariant.
1646 IntrinsicInst *II) {
1647 PHINode *PN;
1648 Value *Init, *OtherOp;
1649
1650 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1651 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1652 auto IID = II->getIntrinsicID();
1653 if (!isIdempotentBinaryIntrinsic(IID) ||
1655 !IC.getDominatorTree().dominates(OtherOp, PN))
1656 return nullptr;
1657
1658 auto *InvariantBinaryInst =
1659 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1660 if (isa<FPMathOperator>(InvariantBinaryInst))
1661 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1662 return InvariantBinaryInst;
1663}
1664
1665static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1666 if (!CanReorderLanes)
1667 return nullptr;
1668
1669 Value *V;
1670 if (match(Arg, m_VecReverse(m_Value(V))))
1671 return V;
1672
1673 ArrayRef<int> Mask;
1674 if (!isa<FixedVectorType>(Arg->getType()) ||
1675 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1676 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1677 return nullptr;
1678
1679 int Sz = Mask.size();
1680 SmallBitVector UsedIndices(Sz);
1681 for (int Idx : Mask) {
1682 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1683 return nullptr;
1684 UsedIndices.set(Idx);
1685 }
1686
1687 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1688 // other changes.
1689 return UsedIndices.all() ? V : nullptr;
1690}
1691
1692/// Fold an unsigned minimum of trailing or leading zero bits counts:
1693/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1694/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1695/// >> ConstOp))
1696/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1697/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1698template <Intrinsic::ID IntrID>
1699static Value *
1701 const DataLayout &DL,
1702 InstCombiner::BuilderTy &Builder) {
1703 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1704 "This helper only supports cttz and ctlz intrinsics");
1705
1706 Value *CtOp1, *CtOp2;
1707 Value *ZeroUndef1, *ZeroUndef2;
1708 if (!match(I0, m_OneUse(
1709 m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
1710 return nullptr;
1711
1712 if (match(I1,
1713 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
1714 return Builder.CreateBinaryIntrinsic(
1715 IntrID, Builder.CreateOr(CtOp1, CtOp2),
1716 Builder.CreateOr(ZeroUndef1, ZeroUndef2));
1717
1718 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1719 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1720 if (!match(I1, m_CheckedInt(LessBitWidth)))
1721 // We have a constant >= BitWidth (which can be handled by CVP)
1722 // or a non-splat vector with elements < and >= BitWidth
1723 return nullptr;
1724
1725 Type *Ty = I1->getType();
1727 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1728 IntrID == Intrinsic::cttz
1729 ? ConstantInt::get(Ty, 1)
1730 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1731 cast<Constant>(I1), DL);
1732 return Builder.CreateBinaryIntrinsic(
1733 IntrID, Builder.CreateOr(CtOp1, NewConst),
1734 ConstantInt::getTrue(ZeroUndef1->getType()));
1735}
1736
1737/// Return whether "X LOp (Y ROp Z)" is always equal to
1738/// "(X LOp Y) ROp (X LOp Z)".
1740 bool HasNSW, Intrinsic::ID ROp) {
1741 switch (ROp) {
1742 case Intrinsic::umax:
1743 case Intrinsic::umin:
1744 if (HasNUW && LOp == Instruction::Add)
1745 return true;
1746 if (HasNUW && LOp == Instruction::Shl)
1747 return true;
1748 return false;
1749 case Intrinsic::smax:
1750 case Intrinsic::smin:
1751 return HasNSW && LOp == Instruction::Add;
1752 default:
1753 return false;
1754 }
1755}
1756
1757// Attempts to factorise a common term
1758// in an instruction that has the form "(A op' B) op (C op' D)
1759// where op is an intrinsic and op' is a binop
1760static Value *
1762 InstCombiner::BuilderTy &Builder) {
1763 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1764 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1765
1768
1769 if (!Op0 || !Op1)
1770 return nullptr;
1771
1772 if (Op0->getOpcode() != Op1->getOpcode())
1773 return nullptr;
1774
1775 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1776 return nullptr;
1777
1778 Instruction::BinaryOps InnerOpcode =
1779 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1780 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1781 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1782
1783 if (!leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode))
1784 return nullptr;
1785
1786 Value *A = Op0->getOperand(0);
1787 Value *B = Op0->getOperand(1);
1788 Value *C = Op1->getOperand(0);
1789 Value *D = Op1->getOperand(1);
1790
1791 // Attempts to swap variables such that A equals C or B equals D,
1792 // if the inner operation is commutative.
1793 if (Op0->isCommutative() && A != C && B != D) {
1794 if (A == D || B == C)
1795 std::swap(C, D);
1796 else
1797 return nullptr;
1798 }
1799
1800 BinaryOperator *NewBinop;
1801 if (A == C) {
1802 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1803 NewBinop =
1804 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1805 } else if (B == D) {
1806 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1807 NewBinop =
1808 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1809 } else {
1810 return nullptr;
1811 }
1812
1813 NewBinop->setHasNoUnsignedWrap(HasNUW);
1814 NewBinop->setHasNoSignedWrap(HasNSW);
1815
1816 return NewBinop;
1817}
1818
1820 Value *Arg0 = II->getArgOperand(0);
1821 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1822 if (!ShiftConst)
1823 return nullptr;
1824
1825 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1826 bool AllPositive = true;
1827 bool AllNegative = true;
1828
1829 auto Check = [&](Constant *C) -> bool {
1830 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1831 const APInt &V = CI->getValue();
1832 if (V.isNonNegative()) {
1833 AllNegative = false;
1834 return AllPositive && V.ult(ElemBits);
1835 }
1836 AllPositive = false;
1837 return AllNegative && V.sgt(-ElemBits);
1838 }
1839 return false;
1840 };
1841
1842 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1843 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1844 if (!Check(ShiftConst->getAggregateElement(I)))
1845 return nullptr;
1846 }
1847
1848 } else if (!Check(ShiftConst))
1849 return nullptr;
1850
1851 IRBuilderBase &B = IC.Builder;
1852 if (AllPositive)
1853 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1854
1855 Value *NegAmt = B.CreateNeg(ShiftConst);
1856 Intrinsic::ID IID = II->getIntrinsicID();
1857 const bool IsSigned =
1858 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1859 Value *Result =
1860 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1861 return IC.replaceInstUsesWith(*II, Result);
1862}
1863
1864/// CallInst simplification. This mostly only handles folding of intrinsic
1865/// instructions. For normal calls, it allows visitCallBase to do the heavy
1866/// lifting.
1868 // Don't try to simplify calls without uses. It will not do anything useful,
1869 // but will result in the following folds being skipped.
1870 if (!CI.use_empty()) {
1871 SmallVector<Value *, 8> Args(CI.args());
1872 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1873 SQ.getWithInstruction(&CI)))
1874 return replaceInstUsesWith(CI, V);
1875 }
1876
1877 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1878 return visitFree(CI, FreedOp);
1879
1880 // If the caller function (i.e. us, the function that contains this CallInst)
1881 // is nounwind, mark the call as nounwind, even if the callee isn't.
1882 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1883 CI.setDoesNotThrow();
1884 return &CI;
1885 }
1886
1888 if (!II)
1889 return visitCallBase(CI);
1890
1891 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1892 // instead of in visitCallBase.
1893 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1894 if (auto NumBytes = MI->getLengthInBytes()) {
1895 // memmove/cpy/set of zero bytes is a noop.
1896 if (NumBytes->isZero())
1897 return eraseInstFromFunction(CI);
1898
1899 // For atomic unordered mem intrinsics if len is not a positive or
1900 // not a multiple of element size then behavior is undefined.
1901 if (MI->isAtomic() &&
1902 (NumBytes->isNegative() ||
1903 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1905 assert(MI->getType()->isVoidTy() &&
1906 "non void atomic unordered mem intrinsic");
1907 return eraseInstFromFunction(*MI);
1908 }
1909 }
1910
1911 // No other transformations apply to volatile transfers.
1912 if (MI->isVolatile())
1913 return nullptr;
1914
1916 // memmove(x,x,size) -> noop.
1917 if (MTI->getSource() == MTI->getDest())
1918 return eraseInstFromFunction(CI);
1919 }
1920
1921 auto IsPointerUndefined = [MI](Value *Ptr) {
1922 return isa<ConstantPointerNull>(Ptr) &&
1924 MI->getFunction(),
1925 cast<PointerType>(Ptr->getType())->getAddressSpace());
1926 };
1927 bool SrcIsUndefined = false;
1928 // If we can determine a pointer alignment that is bigger than currently
1929 // set, update the alignment.
1930 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1932 return I;
1933 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1934 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1935 if (Instruction *I = SimplifyAnyMemSet(MSI))
1936 return I;
1937 }
1938
1939 // If src/dest is null, this memory intrinsic must be a noop.
1940 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1941 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1942 return eraseInstFromFunction(CI);
1943 }
1944
1945 // If we have a memmove and the source operation is a constant global,
1946 // then the source and dest pointers can't alias, so we can change this
1947 // into a call to memcpy.
1948 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1949 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1950 if (GVSrc->isConstant()) {
1951 Module *M = CI.getModule();
1952 Intrinsic::ID MemCpyID =
1953 MMI->isAtomic()
1954 ? Intrinsic::memcpy_element_unordered_atomic
1955 : Intrinsic::memcpy;
1956 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1957 CI.getArgOperand(1)->getType(),
1958 CI.getArgOperand(2)->getType() };
1960 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
1961 return II;
1962 }
1963 }
1964 }
1965
1966 // For fixed width vector result intrinsics, use the generic demanded vector
1967 // support.
1968 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
1969 auto VWidth = IIFVTy->getNumElements();
1970 APInt PoisonElts(VWidth, 0);
1971 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1972 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
1973 if (V != II)
1974 return replaceInstUsesWith(*II, V);
1975 return II;
1976 }
1977 }
1978
1979 if (II->isCommutative()) {
1980 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
1981 replaceOperand(*II, 0, Pair->first);
1982 replaceOperand(*II, 1, Pair->second);
1983 return II;
1984 }
1985
1986 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
1987 return NewCall;
1988 }
1989
1990 // Unused constrained FP intrinsic calls may have declared side effect, which
1991 // prevents it from being removed. In some cases however the side effect is
1992 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1993 // returns a replacement, the call may be removed.
1994 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
1995 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
1996 return eraseInstFromFunction(CI);
1997 }
1998
1999 Intrinsic::ID IID = II->getIntrinsicID();
2000 switch (IID) {
2001 case Intrinsic::objectsize: {
2002 SmallVector<Instruction *> InsertedInstructions;
2003 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
2004 &InsertedInstructions)) {
2005 for (Instruction *Inserted : InsertedInstructions)
2006 Worklist.add(Inserted);
2007 return replaceInstUsesWith(CI, V);
2008 }
2009 return nullptr;
2010 }
2011 case Intrinsic::abs: {
2012 Value *IIOperand = II->getArgOperand(0);
2013 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2014
2015 // abs(-x) -> abs(x)
2016 Value *X;
2017 if (match(IIOperand, m_Neg(m_Value(X)))) {
2018 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2019 replaceOperand(*II, 1, Builder.getTrue());
2020 return replaceOperand(*II, 0, X);
2021 }
2022 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2023 return replaceOperand(*II, 0, X);
2024
2025 Value *Y;
2026 // abs(a * abs(b)) -> abs(a * b)
2027 if (match(IIOperand,
2030 bool NSW =
2031 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2032 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2033 return replaceOperand(*II, 0, XY);
2034 }
2035
2036 if (std::optional<bool> Known =
2037 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2038 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2039 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2040 if (!*Known)
2041 return replaceInstUsesWith(*II, IIOperand);
2042
2043 // abs(x) -> -x if x < 0
2044 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2045 if (IntMinIsPoison)
2046 return BinaryOperator::CreateNSWNeg(IIOperand);
2047 return BinaryOperator::CreateNeg(IIOperand);
2048 }
2049
2050 // abs (sext X) --> zext (abs X*)
2051 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2052 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2053 Value *NarrowAbs =
2054 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2055 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2056 }
2057
2058 // Match a complicated way to check if a number is odd/even:
2059 // abs (srem X, 2) --> and X, 1
2060 const APInt *C;
2061 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2062 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2063
2064 break;
2065 }
2066 case Intrinsic::umin: {
2067 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2068 // umin(x, 1) == zext(x != 0)
2069 if (match(I1, m_One())) {
2070 assert(II->getType()->getScalarSizeInBits() != 1 &&
2071 "Expected simplify of umin with max constant");
2072 Value *Zero = Constant::getNullValue(I0->getType());
2073 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2074 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2075 }
2076 // umin(cttz(x), const) --> cttz(x | (1 << const))
2077 if (Value *FoldedCttz =
2079 I0, I1, DL, Builder))
2080 return replaceInstUsesWith(*II, FoldedCttz);
2081 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2082 if (Value *FoldedCtlz =
2084 I0, I1, DL, Builder))
2085 return replaceInstUsesWith(*II, FoldedCtlz);
2086 [[fallthrough]];
2087 }
2088 case Intrinsic::umax: {
2089 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2090 Value *X, *Y;
2091 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2092 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2093 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2094 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2095 }
2096 Constant *C;
2097 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2098 I0->hasOneUse()) {
2099 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2100 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2101 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2102 }
2103 }
2104 // If C is not 0:
2105 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2106 // If C is not 0 or 1:
2107 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2108 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2109 const APInt *C;
2110 Value *X;
2111 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2112 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2113 return nullptr;
2114 if (C->isZero())
2115 return nullptr;
2116 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2117 return nullptr;
2118
2119 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2120 Value *NewSelect =
2121 Builder.CreateSelect(Cmp, ConstantInt::get(X->getType(), 1), A);
2122 return replaceInstUsesWith(*II, NewSelect);
2123 };
2124
2125 if (IID == Intrinsic::umax) {
2126 if (Instruction *I = foldMaxMulShift(I0, I1))
2127 return I;
2128 if (Instruction *I = foldMaxMulShift(I1, I0))
2129 return I;
2130 }
2131
2132 // If both operands of unsigned min/max are sign-extended, it is still ok
2133 // to narrow the operation.
2134 [[fallthrough]];
2135 }
2136 case Intrinsic::smax:
2137 case Intrinsic::smin: {
2138 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2139 Value *X, *Y;
2140 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2141 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2142 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2143 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2144 }
2145
2146 Constant *C;
2147 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2148 I0->hasOneUse()) {
2149 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2150 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2151 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2152 }
2153 }
2154
2155 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2156 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2157 const APInt *MinC, *MaxC;
2158 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2159 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2160 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2161 Value *NewMax = Builder.CreateBinaryIntrinsic(
2162 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2163 return replaceInstUsesWith(
2164 *II, Builder.CreateBinaryIntrinsic(
2165 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2166 };
2167 if (IID == Intrinsic::smax &&
2169 m_APInt(MinC)))) &&
2170 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2171 return CreateCanonicalClampForm(true);
2172 if (IID == Intrinsic::umax &&
2174 m_APInt(MinC)))) &&
2175 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2176 return CreateCanonicalClampForm(false);
2177
2178 // umin(i1 X, i1 Y) -> and i1 X, Y
2179 // smax(i1 X, i1 Y) -> and i1 X, Y
2180 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2181 II->getType()->isIntOrIntVectorTy(1)) {
2182 return BinaryOperator::CreateAnd(I0, I1);
2183 }
2184
2185 // umax(i1 X, i1 Y) -> or i1 X, Y
2186 // smin(i1 X, i1 Y) -> or i1 X, Y
2187 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2188 II->getType()->isIntOrIntVectorTy(1)) {
2189 return BinaryOperator::CreateOr(I0, I1);
2190 }
2191
2192 // smin(smax(X, -1), 1) -> scmp(X, 0)
2193 // smax(smin(X, 1), -1) -> scmp(X, 0)
2194 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2195 // And i1's have been changed to and/ors
2196 // So we only need to check for smin
2197 if (IID == Intrinsic::smin) {
2198 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2199 match(I1, m_One())) {
2200 Value *Zero = ConstantInt::get(X->getType(), 0);
2201 return replaceInstUsesWith(
2202 CI,
2203 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2204 }
2205 }
2206
2207 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2208 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2209 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2210 // TODO: Canonicalize neg after min/max if I1 is constant.
2211 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2212 (I0->hasOneUse() || I1->hasOneUse())) {
2214 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2215 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2216 }
2217 }
2218
2219 // (umax X, (xor X, Pow2))
2220 // -> (or X, Pow2)
2221 // (umin X, (xor X, Pow2))
2222 // -> (and X, ~Pow2)
2223 // (smax X, (xor X, Pos_Pow2))
2224 // -> (or X, Pos_Pow2)
2225 // (smin X, (xor X, Pos_Pow2))
2226 // -> (and X, ~Pos_Pow2)
2227 // (smax X, (xor X, Neg_Pow2))
2228 // -> (and X, ~Neg_Pow2)
2229 // (smin X, (xor X, Neg_Pow2))
2230 // -> (or X, Neg_Pow2)
2231 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2232 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2233 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2234 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2235 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2236
2237 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2238 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2239 if (KnownSign == std::nullopt) {
2240 UseOr = false;
2241 UseAndN = false;
2242 } else if (*KnownSign /* true is Signed. */) {
2243 UseOr ^= true;
2244 UseAndN ^= true;
2245 Type *Ty = I0->getType();
2246 // Negative power of 2 must be IntMin. It's possible to be able to
2247 // prove negative / power of 2 without actually having known bits, so
2248 // just get the value by hand.
2250 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2251 }
2252 }
2253 if (UseOr)
2254 return BinaryOperator::CreateOr(I0, X);
2255 else if (UseAndN)
2256 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2257 }
2258
2259 // If we can eliminate ~A and Y is free to invert:
2260 // max ~A, Y --> ~(min A, ~Y)
2261 //
2262 // Examples:
2263 // max ~A, ~Y --> ~(min A, Y)
2264 // max ~A, C --> ~(min A, ~C)
2265 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2266 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2267 Value *A;
2268 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2269 !isFreeToInvert(A, A->hasOneUse())) {
2270 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2272 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2273 return BinaryOperator::CreateNot(InvMaxMin);
2274 }
2275 }
2276 return nullptr;
2277 };
2278
2279 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2280 return I;
2281 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2282 return I;
2283
2285 return I;
2286
2287 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2288 const APInt *RHSC;
2289 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2290 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2291 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2292 ConstantInt::get(II->getType(), *RHSC));
2293
2294 // smax(X, -X) --> abs(X)
2295 // smin(X, -X) --> -abs(X)
2296 // umax(X, -X) --> -abs(X)
2297 // umin(X, -X) --> abs(X)
2298 if (isKnownNegation(I0, I1)) {
2299 // We can choose either operand as the input to abs(), but if we can
2300 // eliminate the only use of a value, that's better for subsequent
2301 // transforms/analysis.
2302 if (I0->hasOneUse() && !I1->hasOneUse())
2303 std::swap(I0, I1);
2304
2305 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2306 // operation and potentially its negation.
2307 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2308 Value *Abs = Builder.CreateBinaryIntrinsic(
2309 Intrinsic::abs, I0,
2310 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2311
2312 // We don't have a "nabs" intrinsic, so negate if needed based on the
2313 // max/min operation.
2314 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2315 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2316 return replaceInstUsesWith(CI, Abs);
2317 }
2318
2320 return Sel;
2321
2322 if (Instruction *SAdd = matchSAddSubSat(*II))
2323 return SAdd;
2324
2325 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2326 return replaceInstUsesWith(*II, NewMinMax);
2327
2329 return R;
2330
2331 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2332 return NewMinMax;
2333
2334 // Try to fold minmax with constant RHS based on range information
2335 if (match(I1, m_APIntAllowPoison(RHSC))) {
2336 ICmpInst::Predicate Pred =
2338 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2340 I0, IsSigned, SQ.getWithInstruction(II));
2341 if (!LHS_CR.isFullSet()) {
2342 if (LHS_CR.icmp(Pred, *RHSC))
2343 return replaceInstUsesWith(*II, I0);
2344 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2345 return replaceInstUsesWith(*II,
2346 ConstantInt::get(II->getType(), *RHSC));
2347 }
2348 }
2349
2351 return replaceInstUsesWith(*II, V);
2352
2353 break;
2354 }
2355 case Intrinsic::scmp: {
2356 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2357 Value *LHS, *RHS;
2358 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2359 return replaceInstUsesWith(
2360 CI,
2361 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2362 break;
2363 }
2364 case Intrinsic::bitreverse: {
2365 Value *IIOperand = II->getArgOperand(0);
2366 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2367 Value *X;
2368 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2369 X->getType()->isIntOrIntVectorTy(1)) {
2370 Type *Ty = II->getType();
2371 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2372 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2374 }
2375
2376 if (Instruction *crossLogicOpFold =
2378 return crossLogicOpFold;
2379
2380 break;
2381 }
2382 case Intrinsic::bswap: {
2383 Value *IIOperand = II->getArgOperand(0);
2384
2385 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2386 // inverse-shift-of-bswap:
2387 // bswap (shl X, Y) --> lshr (bswap X), Y
2388 // bswap (lshr X, Y) --> shl (bswap X), Y
2389 Value *X, *Y;
2390 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2391 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2393 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2394 BinaryOperator::BinaryOps InverseShift =
2395 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2396 ? Instruction::LShr
2397 : Instruction::Shl;
2398 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2399 }
2400 }
2401
2402 KnownBits Known = computeKnownBits(IIOperand, II);
2403 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2404 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2405 unsigned BW = Known.getBitWidth();
2406
2407 // bswap(x) -> shift(x) if x has exactly one "active byte"
2408 if (BW - LZ - TZ == 8) {
2409 assert(LZ != TZ && "active byte cannot be in the middle");
2410 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2411 return BinaryOperator::CreateNUWShl(
2412 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2413 // -> lshr(x) if the "active byte" is in the high part of x
2414 return BinaryOperator::CreateExactLShr(
2415 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2416 }
2417
2418 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2419 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2420 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2421 Value *CV = ConstantInt::get(X->getType(), C);
2422 Value *V = Builder.CreateLShr(X, CV);
2423 return new TruncInst(V, IIOperand->getType());
2424 }
2425
2426 if (Instruction *crossLogicOpFold =
2428 return crossLogicOpFold;
2429 }
2430
2431 // Try to fold into bitreverse if bswap is the root of the expression tree.
2432 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2433 /*MatchBitReversals*/ true))
2434 return BitOp;
2435 break;
2436 }
2437 case Intrinsic::masked_load:
2438 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2439 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2440 break;
2441 case Intrinsic::masked_store:
2442 return simplifyMaskedStore(*II);
2443 case Intrinsic::masked_gather:
2444 return simplifyMaskedGather(*II);
2445 case Intrinsic::masked_scatter:
2446 return simplifyMaskedScatter(*II);
2447 case Intrinsic::launder_invariant_group:
2448 case Intrinsic::strip_invariant_group:
2449 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2450 return replaceInstUsesWith(*II, SkippedBarrier);
2451 break;
2452 case Intrinsic::powi:
2453 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2454 // 0 and 1 are handled in instsimplify
2455 // powi(x, -1) -> 1/x
2456 if (Power->isMinusOne())
2457 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2458 II->getArgOperand(0), II);
2459 // powi(x, 2) -> x*x
2460 if (Power->equalsInt(2))
2461 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2462 II->getArgOperand(0), II);
2463
2464 if (!Power->getValue()[0]) {
2465 Value *X;
2466 // If power is even:
2467 // powi(-x, p) -> powi(x, p)
2468 // powi(fabs(x), p) -> powi(x, p)
2469 // powi(copysign(x, y), p) -> powi(x, p)
2470 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2471 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2472 match(II->getArgOperand(0),
2474 return replaceOperand(*II, 0, X);
2475 }
2476 }
2477 break;
2478
2479 case Intrinsic::cttz:
2480 case Intrinsic::ctlz:
2481 if (auto *I = foldCttzCtlz(*II, *this))
2482 return I;
2483 break;
2484
2485 case Intrinsic::ctpop:
2486 if (auto *I = foldCtpop(*II, *this))
2487 return I;
2488 break;
2489
2490 case Intrinsic::fshl:
2491 case Intrinsic::fshr: {
2492 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2493 Type *Ty = II->getType();
2494 unsigned BitWidth = Ty->getScalarSizeInBits();
2495 Constant *ShAmtC;
2496 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2497 // Canonicalize a shift amount constant operand to modulo the bit-width.
2498 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2499 Constant *ModuloC =
2500 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2501 if (!ModuloC)
2502 return nullptr;
2503 if (ModuloC != ShAmtC)
2504 return replaceOperand(*II, 2, ModuloC);
2505
2507 ShAmtC, DL),
2508 m_One()) &&
2509 "Shift amount expected to be modulo bitwidth");
2510
2511 // Canonicalize funnel shift right by constant to funnel shift left. This
2512 // is not entirely arbitrary. For historical reasons, the backend may
2513 // recognize rotate left patterns but miss rotate right patterns.
2514 if (IID == Intrinsic::fshr) {
2515 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2516 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2517 return nullptr;
2518
2519 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2520 Module *Mod = II->getModule();
2521 Function *Fshl =
2522 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2523 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2524 }
2525 assert(IID == Intrinsic::fshl &&
2526 "All funnel shifts by simple constants should go left");
2527
2528 // fshl(X, 0, C) --> shl X, C
2529 // fshl(X, undef, C) --> shl X, C
2530 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2531 return BinaryOperator::CreateShl(Op0, ShAmtC);
2532
2533 // fshl(0, X, C) --> lshr X, (BW-C)
2534 // fshl(undef, X, C) --> lshr X, (BW-C)
2535 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2536 return BinaryOperator::CreateLShr(Op1,
2537 ConstantExpr::getSub(WidthC, ShAmtC));
2538
2539 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2540 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2541 Module *Mod = II->getModule();
2542 Function *Bswap =
2543 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2544 return CallInst::Create(Bswap, { Op0 });
2545 }
2546 if (Instruction *BitOp =
2547 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2548 /*MatchBitReversals*/ true))
2549 return BitOp;
2550
2551 // R = fshl(X, X, C2)
2552 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2553 Value *InnerOp;
2554 const APInt *ShAmtInnerC, *ShAmtOuterC;
2555 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2556 m_APInt(ShAmtInnerC))) &&
2557 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2558 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2559 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2560 if (Modulo.isZero())
2561 return replaceInstUsesWith(*II, InnerOp);
2562 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2564 {InnerOp, InnerOp, ModuloC});
2565 }
2566 }
2567
2568 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2569 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2570 // if BitWidth is a power-of-2
2571 Value *Y;
2572 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2573 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2574 Module *Mod = II->getModule();
2576 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2577 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2578 }
2579
2580 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2581 // power-of-2
2582 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2583 match(Op1, m_ZeroInt())) {
2584 Value *Op2 = II->getArgOperand(2);
2585 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2586 return BinaryOperator::CreateShl(Op0, And);
2587 }
2588
2589 // Left or right might be masked.
2591 return &CI;
2592
2593 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2594 // so only the low bits of the shift amount are demanded if the bitwidth is
2595 // a power-of-2.
2596 if (!isPowerOf2_32(BitWidth))
2597 break;
2599 KnownBits Op2Known(BitWidth);
2600 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2601 return &CI;
2602 break;
2603 }
2604 case Intrinsic::ptrmask: {
2605 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2606 KnownBits Known(BitWidth);
2608 return II;
2609
2610 Value *InnerPtr, *InnerMask;
2611 bool Changed = false;
2612 // Combine:
2613 // (ptrmask (ptrmask p, A), B)
2614 // -> (ptrmask p, (and A, B))
2615 if (match(II->getArgOperand(0),
2617 m_Value(InnerMask))))) {
2618 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2619 "Mask types must match");
2620 // TODO: If InnerMask == Op1, we could copy attributes from inner
2621 // callsite -> outer callsite.
2622 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2623 replaceOperand(CI, 0, InnerPtr);
2624 replaceOperand(CI, 1, NewMask);
2625 Changed = true;
2626 }
2627
2628 // See if we can deduce non-null.
2629 if (!CI.hasRetAttr(Attribute::NonNull) &&
2630 (Known.isNonZero() ||
2631 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2632 CI.addRetAttr(Attribute::NonNull);
2633 Changed = true;
2634 }
2635
2636 unsigned NewAlignmentLog =
2638 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2639 // Known bits will capture if we had alignment information associated with
2640 // the pointer argument.
2641 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2643 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2644 Changed = true;
2645 }
2646 if (Changed)
2647 return &CI;
2648 break;
2649 }
2650 case Intrinsic::uadd_with_overflow:
2651 case Intrinsic::sadd_with_overflow: {
2652 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2653 return I;
2654
2655 // Given 2 constant operands whose sum does not overflow:
2656 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2657 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2658 Value *X;
2659 const APInt *C0, *C1;
2660 Value *Arg0 = II->getArgOperand(0);
2661 Value *Arg1 = II->getArgOperand(1);
2662 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2663 bool HasNWAdd = IsSigned
2664 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2665 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2666 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2667 bool Overflow;
2668 APInt NewC =
2669 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2670 if (!Overflow)
2671 return replaceInstUsesWith(
2672 *II, Builder.CreateBinaryIntrinsic(
2673 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2674 }
2675 break;
2676 }
2677
2678 case Intrinsic::umul_with_overflow:
2679 case Intrinsic::smul_with_overflow:
2680 case Intrinsic::usub_with_overflow:
2681 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2682 return I;
2683 break;
2684
2685 case Intrinsic::ssub_with_overflow: {
2686 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2687 return I;
2688
2689 Constant *C;
2690 Value *Arg0 = II->getArgOperand(0);
2691 Value *Arg1 = II->getArgOperand(1);
2692 // Given a constant C that is not the minimum signed value
2693 // for an integer of a given bit width:
2694 //
2695 // ssubo X, C -> saddo X, -C
2696 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2697 Value *NegVal = ConstantExpr::getNeg(C);
2698 // Build a saddo call that is equivalent to the discovered
2699 // ssubo call.
2700 return replaceInstUsesWith(
2701 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2702 Arg0, NegVal));
2703 }
2704
2705 break;
2706 }
2707
2708 case Intrinsic::uadd_sat:
2709 case Intrinsic::sadd_sat:
2710 case Intrinsic::usub_sat:
2711 case Intrinsic::ssub_sat: {
2713 Type *Ty = SI->getType();
2714 Value *Arg0 = SI->getLHS();
2715 Value *Arg1 = SI->getRHS();
2716
2717 // Make use of known overflow information.
2718 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2719 Arg0, Arg1, SI);
2720 switch (OR) {
2722 break;
2724 if (SI->isSigned())
2725 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2726 else
2727 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2729 unsigned BitWidth = Ty->getScalarSizeInBits();
2730 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2731 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2732 }
2734 unsigned BitWidth = Ty->getScalarSizeInBits();
2735 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2736 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2737 }
2738 }
2739
2740 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2741 // which after that:
2742 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2743 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2744 Constant *C, *C1;
2745 Value *A;
2746 if (IID == Intrinsic::usub_sat &&
2747 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2748 match(Arg1, m_ImmConstant(C1))) {
2749 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2750 auto *NewSub =
2751 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2752 return replaceInstUsesWith(*SI, NewSub);
2753 }
2754
2755 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2756 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2757 C->isNotMinSignedValue()) {
2758 Value *NegVal = ConstantExpr::getNeg(C);
2759 return replaceInstUsesWith(
2760 *II, Builder.CreateBinaryIntrinsic(
2761 Intrinsic::sadd_sat, Arg0, NegVal));
2762 }
2763
2764 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2765 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2766 // if Val and Val2 have the same sign
2767 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2768 Value *X;
2769 const APInt *Val, *Val2;
2770 APInt NewVal;
2771 bool IsUnsigned =
2772 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2773 if (Other->getIntrinsicID() == IID &&
2774 match(Arg1, m_APInt(Val)) &&
2775 match(Other->getArgOperand(0), m_Value(X)) &&
2776 match(Other->getArgOperand(1), m_APInt(Val2))) {
2777 if (IsUnsigned)
2778 NewVal = Val->uadd_sat(*Val2);
2779 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2780 bool Overflow;
2781 NewVal = Val->sadd_ov(*Val2, Overflow);
2782 if (Overflow) {
2783 // Both adds together may add more than SignedMaxValue
2784 // without saturating the final result.
2785 break;
2786 }
2787 } else {
2788 // Cannot fold saturated addition with different signs.
2789 break;
2790 }
2791
2792 return replaceInstUsesWith(
2793 *II, Builder.CreateBinaryIntrinsic(
2794 IID, X, ConstantInt::get(II->getType(), NewVal)));
2795 }
2796 }
2797 break;
2798 }
2799
2800 case Intrinsic::minnum:
2801 case Intrinsic::maxnum:
2802 case Intrinsic::minimum:
2803 case Intrinsic::maximum: {
2804 Value *Arg0 = II->getArgOperand(0);
2805 Value *Arg1 = II->getArgOperand(1);
2806 Value *X, *Y;
2807 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2808 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2809 // If both operands are negated, invert the call and negate the result:
2810 // min(-X, -Y) --> -(max(X, Y))
2811 // max(-X, -Y) --> -(min(X, Y))
2812 Intrinsic::ID NewIID;
2813 switch (IID) {
2814 case Intrinsic::maxnum:
2815 NewIID = Intrinsic::minnum;
2816 break;
2817 case Intrinsic::minnum:
2818 NewIID = Intrinsic::maxnum;
2819 break;
2820 case Intrinsic::maximum:
2821 NewIID = Intrinsic::minimum;
2822 break;
2823 case Intrinsic::minimum:
2824 NewIID = Intrinsic::maximum;
2825 break;
2826 default:
2827 llvm_unreachable("unexpected intrinsic ID");
2828 }
2829 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2830 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2831 FNeg->copyIRFlags(II);
2832 return FNeg;
2833 }
2834
2835 // m(m(X, C2), C1) -> m(X, C)
2836 const APFloat *C1, *C2;
2837 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2838 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2839 ((match(M->getArgOperand(0), m_Value(X)) &&
2840 match(M->getArgOperand(1), m_APFloat(C2))) ||
2841 (match(M->getArgOperand(1), m_Value(X)) &&
2842 match(M->getArgOperand(0), m_APFloat(C2))))) {
2843 APFloat Res(0.0);
2844 switch (IID) {
2845 case Intrinsic::maxnum:
2846 Res = maxnum(*C1, *C2);
2847 break;
2848 case Intrinsic::minnum:
2849 Res = minnum(*C1, *C2);
2850 break;
2851 case Intrinsic::maximum:
2852 Res = maximum(*C1, *C2);
2853 break;
2854 case Intrinsic::minimum:
2855 Res = minimum(*C1, *C2);
2856 break;
2857 default:
2858 llvm_unreachable("unexpected intrinsic ID");
2859 }
2860 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2861 // was a simplification (so Arg0 and its original flags could
2862 // propagate?)
2863 Value *V = Builder.CreateBinaryIntrinsic(
2864 IID, X, ConstantFP::get(Arg0->getType(), Res),
2866 return replaceInstUsesWith(*II, V);
2867 }
2868 }
2869
2870 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2871 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2872 match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) &&
2873 X->getType() == Y->getType()) {
2874 Value *NewCall =
2875 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2876 return new FPExtInst(NewCall, II->getType());
2877 }
2878
2879 // max X, -X --> fabs X
2880 // min X, -X --> -(fabs X)
2881 // TODO: Remove one-use limitation? That is obviously better for max,
2882 // hence why we don't check for one-use for that. However,
2883 // it would be an extra instruction for min (fnabs), but
2884 // that is still likely better for analysis and codegen.
2885 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2886 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2887 return Op0->hasOneUse() ||
2888 (IID != Intrinsic::minimum && IID != Intrinsic::minnum);
2889 return false;
2890 };
2891
2892 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2893 Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
2894 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum)
2895 R = Builder.CreateFNegFMF(R, II);
2896 return replaceInstUsesWith(*II, R);
2897 }
2898
2899 break;
2900 }
2901 case Intrinsic::matrix_multiply: {
2902 // Optimize negation in matrix multiplication.
2903
2904 // -A * -B -> A * B
2905 Value *A, *B;
2906 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2907 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2908 replaceOperand(*II, 0, A);
2909 replaceOperand(*II, 1, B);
2910 return II;
2911 }
2912
2913 Value *Op0 = II->getOperand(0);
2914 Value *Op1 = II->getOperand(1);
2915 Value *OpNotNeg, *NegatedOp;
2916 unsigned NegatedOpArg, OtherOpArg;
2917 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
2918 NegatedOp = Op0;
2919 NegatedOpArg = 0;
2920 OtherOpArg = 1;
2921 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
2922 NegatedOp = Op1;
2923 NegatedOpArg = 1;
2924 OtherOpArg = 0;
2925 } else
2926 // Multiplication doesn't have a negated operand.
2927 break;
2928
2929 // Only optimize if the negated operand has only one use.
2930 if (!NegatedOp->hasOneUse())
2931 break;
2932
2933 Value *OtherOp = II->getOperand(OtherOpArg);
2934 VectorType *RetTy = cast<VectorType>(II->getType());
2935 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
2936 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
2937 ElementCount NegatedCount = NegatedOpTy->getElementCount();
2938 ElementCount OtherCount = OtherOpTy->getElementCount();
2939 ElementCount RetCount = RetTy->getElementCount();
2940 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
2941 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
2942 ElementCount::isKnownLT(OtherCount, RetCount)) {
2943 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
2944 replaceOperand(*II, NegatedOpArg, OpNotNeg);
2945 replaceOperand(*II, OtherOpArg, InverseOtherOp);
2946 return II;
2947 }
2948 // (-A) * B -> -(A * B), if it is cheaper to negate the result
2949 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
2950 SmallVector<Value *, 5> NewArgs(II->args());
2951 NewArgs[NegatedOpArg] = OpNotNeg;
2952 Instruction *NewMul =
2953 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
2954 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
2955 }
2956 break;
2957 }
2958 case Intrinsic::fmuladd: {
2959 // Try to simplify the underlying FMul.
2960 if (Value *V =
2961 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
2962 II->getFastMathFlags(), SQ.getWithInstruction(II)))
2963 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
2964 II->getFastMathFlags());
2965
2966 [[fallthrough]];
2967 }
2968 case Intrinsic::fma: {
2969 // fma fneg(x), fneg(y), z -> fma x, y, z
2970 Value *Src0 = II->getArgOperand(0);
2971 Value *Src1 = II->getArgOperand(1);
2972 Value *Src2 = II->getArgOperand(2);
2973 Value *X, *Y;
2974 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
2975 replaceOperand(*II, 0, X);
2976 replaceOperand(*II, 1, Y);
2977 return II;
2978 }
2979
2980 // fma fabs(x), fabs(x), z -> fma x, x, z
2981 if (match(Src0, m_FAbs(m_Value(X))) &&
2982 match(Src1, m_FAbs(m_Specific(X)))) {
2983 replaceOperand(*II, 0, X);
2984 replaceOperand(*II, 1, X);
2985 return II;
2986 }
2987
2988 // Try to simplify the underlying FMul. We can only apply simplifications
2989 // that do not require rounding.
2990 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
2991 SQ.getWithInstruction(II)))
2992 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
2993
2994 // fma x, y, 0 -> fmul x, y
2995 // This is always valid for -0.0, but requires nsz for +0.0 as
2996 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2997 if (match(Src2, m_NegZeroFP()) ||
2998 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
2999 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
3000
3001 // fma x, -1.0, y -> fsub y, x
3002 if (match(Src1, m_SpecificFP(-1.0)))
3003 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
3004
3005 break;
3006 }
3007 case Intrinsic::copysign: {
3008 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3009 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3010 Sign, getSimplifyQuery().getWithInstruction(II))) {
3011 if (*KnownSignBit) {
3012 // If we know that the sign argument is negative, reduce to FNABS:
3013 // copysign Mag, -Sign --> fneg (fabs Mag)
3014 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
3015 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3016 }
3017
3018 // If we know that the sign argument is positive, reduce to FABS:
3019 // copysign Mag, +Sign --> fabs Mag
3020 Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
3021 return replaceInstUsesWith(*II, Fabs);
3022 }
3023
3024 // Propagate sign argument through nested calls:
3025 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3026 Value *X;
3028 Value *CopySign =
3029 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3030 return replaceInstUsesWith(*II, CopySign);
3031 }
3032
3033 // Clear sign-bit of constant magnitude:
3034 // copysign -MagC, X --> copysign MagC, X
3035 // TODO: Support constant folding for fabs
3036 const APFloat *MagC;
3037 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3038 APFloat PosMagC = *MagC;
3039 PosMagC.clearSign();
3040 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
3041 }
3042
3043 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3044 // copysign (fabs X), Sign --> copysign X, Sign
3045 // copysign (fneg X), Sign --> copysign X, Sign
3046 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3047 return replaceOperand(*II, 0, X);
3048
3049 Type *SignEltTy = Sign->getType()->getScalarType();
3050
3051 Value *CastSrc;
3052 if (match(Sign,
3054 CastSrc->getType()->isIntOrIntVectorTy() &&
3056 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3058 APInt::getSignMask(Known.getBitWidth()), Known,
3059 SQ))
3060 return II;
3061 }
3062
3063 break;
3064 }
3065 case Intrinsic::fabs: {
3066 Value *Cond, *TVal, *FVal;
3067 Value *Arg = II->getArgOperand(0);
3068 Value *X;
3069 // fabs (-X) --> fabs (X)
3070 if (match(Arg, m_FNeg(m_Value(X)))) {
3071 CallInst *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
3072 return replaceInstUsesWith(CI, Fabs);
3073 }
3074
3075 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3076 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3077 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3078 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3079 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3080 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3081 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3082 FastMathFlags FMF1 = II->getFastMathFlags();
3083 FastMathFlags FMF2 = cast<SelectInst>(Arg)->getFastMathFlags();
3084 FMF2.setNoSignedZeros(false);
3085 SI->setFastMathFlags(FMF1 | FMF2);
3086 return SI;
3087 }
3088 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3089 if (match(TVal, m_FNeg(m_Specific(FVal))))
3090 return replaceOperand(*II, 0, FVal);
3091 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3092 if (match(FVal, m_FNeg(m_Specific(TVal))))
3093 return replaceOperand(*II, 0, TVal);
3094 }
3095
3096 Value *Magnitude, *Sign;
3097 if (match(II->getArgOperand(0),
3098 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3099 // fabs (copysign x, y) -> (fabs x)
3100 CallInst *AbsSign =
3101 Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Magnitude, II);
3102 return replaceInstUsesWith(*II, AbsSign);
3103 }
3104
3105 [[fallthrough]];
3106 }
3107 case Intrinsic::ceil:
3108 case Intrinsic::floor:
3109 case Intrinsic::round:
3110 case Intrinsic::roundeven:
3111 case Intrinsic::nearbyint:
3112 case Intrinsic::rint:
3113 case Intrinsic::trunc: {
3114 Value *ExtSrc;
3115 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3116 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3117 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3118 return new FPExtInst(NarrowII, II->getType());
3119 }
3120 break;
3121 }
3122 case Intrinsic::cos:
3123 case Intrinsic::amdgcn_cos:
3124 case Intrinsic::cosh: {
3125 Value *X, *Sign;
3126 Value *Src = II->getArgOperand(0);
3127 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3128 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3129 // f(-x) --> f(x)
3130 // f(fabs(x)) --> f(x)
3131 // f(copysign(x, y)) --> f(x)
3132 // for f in {cos, cosh}
3133 return replaceOperand(*II, 0, X);
3134 }
3135 break;
3136 }
3137 case Intrinsic::sin:
3138 case Intrinsic::amdgcn_sin:
3139 case Intrinsic::sinh:
3140 case Intrinsic::tan:
3141 case Intrinsic::tanh: {
3142 Value *X;
3143 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3144 // f(-x) --> -f(x)
3145 // for f in {sin, sinh, tan, tanh}
3146 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3147 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3148 }
3149 break;
3150 }
3151 case Intrinsic::ldexp: {
3152 // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
3153 //
3154 // The danger is if the first ldexp would overflow to infinity or underflow
3155 // to zero, but the combined exponent avoids it. We ignore this with
3156 // reassoc.
3157 //
3158 // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
3159 // it would just double down on the overflow/underflow which would occur
3160 // anyway.
3161 //
3162 // TODO: Could do better if we had range tracking for the input value
3163 // exponent. Also could broaden sign check to cover == 0 case.
3164 Value *Src = II->getArgOperand(0);
3165 Value *Exp = II->getArgOperand(1);
3166
3167 uint64_t ConstExp;
3168 if (match(Exp, m_ConstantInt(ConstExp))) {
3169 // ldexp(x, K) -> fmul x, 2^K
3170 const fltSemantics &FPTy =
3171 Src->getType()->getScalarType()->getFltSemantics();
3172
3173 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3175 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3176 // Skip overflow and underflow cases.
3177 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3178 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3179 }
3180 }
3181
3182 Value *InnerSrc;
3183 Value *InnerExp;
3185 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3186 Exp->getType() == InnerExp->getType()) {
3187 FastMathFlags FMF = II->getFastMathFlags();
3188 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3189
3190 if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3191 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II))) {
3192 // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
3193 // width.
3194 Value *NewExp = Builder.CreateAdd(InnerExp, Exp);
3195 II->setArgOperand(1, NewExp);
3196 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3197 return replaceOperand(*II, 0, InnerSrc);
3198 }
3199 }
3200
3201 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3202 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3203 Value *ExtSrc;
3204 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3205 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3206 Value *Select =
3207 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3208 ConstantFP::get(II->getType(), 1.0));
3210 }
3211 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3212 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3213 Value *Select =
3214 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3215 ConstantFP::get(II->getType(), 1.0));
3217 }
3218
3219 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3220 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3221 ///
3222 // TODO: If we cared, should insert a canonicalize for x
3223 Value *SelectCond, *SelectLHS, *SelectRHS;
3224 if (match(II->getArgOperand(1),
3225 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3226 m_Value(SelectRHS))))) {
3227 Value *NewLdexp = nullptr;
3228 Value *Select = nullptr;
3229 if (match(SelectRHS, m_ZeroInt())) {
3230 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3231 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3232 } else if (match(SelectLHS, m_ZeroInt())) {
3233 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3234 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3235 }
3236
3237 if (NewLdexp) {
3238 Select->takeName(II);
3239 return replaceInstUsesWith(*II, Select);
3240 }
3241 }
3242
3243 break;
3244 }
3245 case Intrinsic::ptrauth_auth:
3246 case Intrinsic::ptrauth_resign: {
3247 // We don't support this optimization on intrinsic calls with deactivation
3248 // symbols, which are represented using operand bundles.
3249 if (II->hasOperandBundles())
3250 break;
3251
3252 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3253 // sign+auth component if the key and discriminator match.
3254 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3255 Value *Ptr = II->getArgOperand(0);
3256 Value *Key = II->getArgOperand(1);
3257 Value *Disc = II->getArgOperand(2);
3258
3259 // AuthKey will be the key we need to end up authenticating against in
3260 // whatever we replace this sequence with.
3261 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3262 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3263 // We don't support this optimization on intrinsic calls with deactivation
3264 // symbols, which are represented using operand bundles.
3265 if (CI->hasOperandBundles())
3266 break;
3267
3268 BasePtr = CI->getArgOperand(0);
3269 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3270 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3271 break;
3272 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3273 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3274 break;
3275 AuthKey = CI->getArgOperand(1);
3276 AuthDisc = CI->getArgOperand(2);
3277 } else
3278 break;
3279 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3280 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3281 // our purposes, so check for that too.
3282 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3283 if (!CPA || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3284 break;
3285
3286 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3287 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3288 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3289 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3290 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3291 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3292 SignDisc, /*AddrDisc=*/Null,
3293 /*DeactivationSymbol=*/Null);
3295 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3296 return eraseInstFromFunction(*II);
3297 }
3298
3299 // auth(ptrauth(p,k,d),k,d) -> p
3300 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3301 } else
3302 break;
3303
3304 unsigned NewIntrin;
3305 if (AuthKey && NeedSign) {
3306 // resign(0,1) + resign(1,2) = resign(0, 2)
3307 NewIntrin = Intrinsic::ptrauth_resign;
3308 } else if (AuthKey) {
3309 // resign(0,1) + auth(1) = auth(0)
3310 NewIntrin = Intrinsic::ptrauth_auth;
3311 } else if (NeedSign) {
3312 // sign(0) + resign(0, 1) = sign(1)
3313 NewIntrin = Intrinsic::ptrauth_sign;
3314 } else {
3315 // sign(0) + auth(0) = nop
3316 replaceInstUsesWith(*II, BasePtr);
3317 return eraseInstFromFunction(*II);
3318 }
3319
3320 SmallVector<Value *, 4> CallArgs;
3321 CallArgs.push_back(BasePtr);
3322 if (AuthKey) {
3323 CallArgs.push_back(AuthKey);
3324 CallArgs.push_back(AuthDisc);
3325 }
3326
3327 if (NeedSign) {
3328 CallArgs.push_back(II->getArgOperand(3));
3329 CallArgs.push_back(II->getArgOperand(4));
3330 }
3331
3332 Function *NewFn =
3333 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3334 return CallInst::Create(NewFn, CallArgs);
3335 }
3336 case Intrinsic::arm_neon_vtbl1:
3337 case Intrinsic::arm_neon_vtbl2:
3338 case Intrinsic::arm_neon_vtbl3:
3339 case Intrinsic::arm_neon_vtbl4:
3340 case Intrinsic::aarch64_neon_tbl1:
3341 case Intrinsic::aarch64_neon_tbl2:
3342 case Intrinsic::aarch64_neon_tbl3:
3343 case Intrinsic::aarch64_neon_tbl4:
3344 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3345 case Intrinsic::arm_neon_vtbx1:
3346 case Intrinsic::arm_neon_vtbx2:
3347 case Intrinsic::arm_neon_vtbx3:
3348 case Intrinsic::arm_neon_vtbx4:
3349 case Intrinsic::aarch64_neon_tbx1:
3350 case Intrinsic::aarch64_neon_tbx2:
3351 case Intrinsic::aarch64_neon_tbx3:
3352 case Intrinsic::aarch64_neon_tbx4:
3353 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3354
3355 case Intrinsic::arm_neon_vmulls:
3356 case Intrinsic::arm_neon_vmullu:
3357 case Intrinsic::aarch64_neon_smull:
3358 case Intrinsic::aarch64_neon_umull: {
3359 Value *Arg0 = II->getArgOperand(0);
3360 Value *Arg1 = II->getArgOperand(1);
3361
3362 // Handle mul by zero first:
3364 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3365 }
3366
3367 // Check for constant LHS & RHS - in this case we just simplify.
3368 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3369 IID == Intrinsic::aarch64_neon_umull);
3370 VectorType *NewVT = cast<VectorType>(II->getType());
3371 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3372 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3373 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3374 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3375 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3376 }
3377
3378 // Couldn't simplify - canonicalize constant to the RHS.
3379 std::swap(Arg0, Arg1);
3380 }
3381
3382 // Handle mul by one:
3383 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3384 if (ConstantInt *Splat =
3385 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3386 if (Splat->isOne())
3387 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3388 /*isSigned=*/!Zext);
3389
3390 break;
3391 }
3392 case Intrinsic::arm_neon_aesd:
3393 case Intrinsic::arm_neon_aese:
3394 case Intrinsic::aarch64_crypto_aesd:
3395 case Intrinsic::aarch64_crypto_aese:
3396 case Intrinsic::aarch64_sve_aesd:
3397 case Intrinsic::aarch64_sve_aese: {
3398 Value *DataArg = II->getArgOperand(0);
3399 Value *KeyArg = II->getArgOperand(1);
3400
3401 // Accept zero on either operand.
3402 if (!match(KeyArg, m_ZeroInt()))
3403 std::swap(KeyArg, DataArg);
3404
3405 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3406 Value *Data, *Key;
3407 if (match(KeyArg, m_ZeroInt()) &&
3408 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3409 replaceOperand(*II, 0, Data);
3410 replaceOperand(*II, 1, Key);
3411 return II;
3412 }
3413 break;
3414 }
3415 case Intrinsic::arm_neon_vshifts:
3416 case Intrinsic::arm_neon_vshiftu:
3417 case Intrinsic::aarch64_neon_sshl:
3418 case Intrinsic::aarch64_neon_ushl:
3419 return foldNeonShift(II, *this);
3420 case Intrinsic::hexagon_V6_vandvrt:
3421 case Intrinsic::hexagon_V6_vandvrt_128B: {
3422 // Simplify Q -> V -> Q conversion.
3423 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3424 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3425 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3426 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3427 break;
3428 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3429 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3430 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3431 // Check if every byte has common bits in Bytes and Mask.
3432 uint64_t C = Bytes1 & Mask1;
3433 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3434 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3435 }
3436 break;
3437 }
3438 case Intrinsic::stackrestore: {
3439 enum class ClassifyResult {
3440 None,
3441 Alloca,
3442 StackRestore,
3443 CallWithSideEffects,
3444 };
3445 auto Classify = [](const Instruction *I) {
3446 if (isa<AllocaInst>(I))
3447 return ClassifyResult::Alloca;
3448
3449 if (auto *CI = dyn_cast<CallInst>(I)) {
3450 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3451 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3452 return ClassifyResult::StackRestore;
3453
3454 if (II->mayHaveSideEffects())
3455 return ClassifyResult::CallWithSideEffects;
3456 } else {
3457 // Consider all non-intrinsic calls to be side effects
3458 return ClassifyResult::CallWithSideEffects;
3459 }
3460 }
3461
3462 return ClassifyResult::None;
3463 };
3464
3465 // If the stacksave and the stackrestore are in the same BB, and there is
3466 // no intervening call, alloca, or stackrestore of a different stacksave,
3467 // remove the restore. This can happen when variable allocas are DCE'd.
3468 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3469 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3470 SS->getParent() == II->getParent()) {
3471 BasicBlock::iterator BI(SS);
3472 bool CannotRemove = false;
3473 for (++BI; &*BI != II; ++BI) {
3474 switch (Classify(&*BI)) {
3475 case ClassifyResult::None:
3476 // So far so good, look at next instructions.
3477 break;
3478
3479 case ClassifyResult::StackRestore:
3480 // If we found an intervening stackrestore for a different
3481 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3482 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3483 CannotRemove = true;
3484 break;
3485
3486 case ClassifyResult::Alloca:
3487 case ClassifyResult::CallWithSideEffects:
3488 // If we found an alloca, a non-intrinsic call, or an intrinsic
3489 // call with side effects, we can't remove the stackrestore.
3490 CannotRemove = true;
3491 break;
3492 }
3493 if (CannotRemove)
3494 break;
3495 }
3496
3497 if (!CannotRemove)
3498 return eraseInstFromFunction(CI);
3499 }
3500 }
3501
3502 // Scan down this block to see if there is another stack restore in the
3503 // same block without an intervening call/alloca.
3505 Instruction *TI = II->getParent()->getTerminator();
3506 bool CannotRemove = false;
3507 for (++BI; &*BI != TI; ++BI) {
3508 switch (Classify(&*BI)) {
3509 case ClassifyResult::None:
3510 // So far so good, look at next instructions.
3511 break;
3512
3513 case ClassifyResult::StackRestore:
3514 // If there is a stackrestore below this one, remove this one.
3515 return eraseInstFromFunction(CI);
3516
3517 case ClassifyResult::Alloca:
3518 case ClassifyResult::CallWithSideEffects:
3519 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3520 // with side effects (such as llvm.stacksave and llvm.read_register),
3521 // we can't remove the stack restore.
3522 CannotRemove = true;
3523 break;
3524 }
3525 if (CannotRemove)
3526 break;
3527 }
3528
3529 // If the stack restore is in a return, resume, or unwind block and if there
3530 // are no allocas or calls between the restore and the return, nuke the
3531 // restore.
3532 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3533 return eraseInstFromFunction(CI);
3534 break;
3535 }
3536 case Intrinsic::lifetime_end:
3537 // Asan needs to poison memory to detect invalid access which is possible
3538 // even for empty lifetime range.
3539 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3540 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3541 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress) ||
3542 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag))
3543 break;
3544
3545 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3546 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3547 }))
3548 return nullptr;
3549 break;
3550 case Intrinsic::assume: {
3551 Value *IIOperand = II->getArgOperand(0);
3553 II->getOperandBundlesAsDefs(OpBundles);
3554
3555 /// This will remove the boolean Condition from the assume given as
3556 /// argument and remove the assume if it becomes useless.
3557 /// always returns nullptr for use as a return values.
3558 auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
3559 assert(isa<AssumeInst>(Assume));
3561 return eraseInstFromFunction(CI);
3562 replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
3563 return nullptr;
3564 };
3565 // Remove an assume if it is followed by an identical assume.
3566 // TODO: Do we need this? Unless there are conflicting assumptions, the
3567 // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3568 Instruction *Next = II->getNextNode();
3570 return RemoveConditionFromAssume(Next);
3571
3572 // Canonicalize assume(a && b) -> assume(a); assume(b);
3573 // Note: New assumption intrinsics created here are registered by
3574 // the InstCombineIRInserter object.
3575 FunctionType *AssumeIntrinsicTy = II->getFunctionType();
3576 Value *AssumeIntrinsic = II->getCalledOperand();
3577 Value *A, *B;
3578 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3579 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
3580 II->getName());
3581 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
3582 return eraseInstFromFunction(*II);
3583 }
3584 // assume(!(a || b)) -> assume(!a); assume(!b);
3585 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3586 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3587 Builder.CreateNot(A), OpBundles, II->getName());
3588 Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
3589 Builder.CreateNot(B), II->getName());
3590 return eraseInstFromFunction(*II);
3591 }
3592
3593 // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3594 // (if assume is valid at the load)
3595 Instruction *LHS;
3597 m_Zero())) &&
3598 LHS->getOpcode() == Instruction::Load &&
3599 LHS->getType()->isPointerTy() &&
3600 isValidAssumeForContext(II, LHS, &DT)) {
3601 MDNode *MD = MDNode::get(II->getContext(), {});
3602 LHS->setMetadata(LLVMContext::MD_nonnull, MD);
3603 LHS->setMetadata(LLVMContext::MD_noundef, MD);
3604 return RemoveConditionFromAssume(II);
3605
3606 // TODO: apply nonnull return attributes to calls and invokes
3607 // TODO: apply range metadata for range check patterns?
3608 }
3609
3610 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3611 OperandBundleUse OBU = II->getOperandBundleAt(Idx);
3612
3613 // Separate storage assumptions apply to the underlying allocations, not
3614 // any particular pointer within them. When evaluating the hints for AA
3615 // purposes we getUnderlyingObject them; by precomputing the answers here
3616 // we can avoid having to do so repeatedly there.
3617 if (OBU.getTagName() == "separate_storage") {
3618 assert(OBU.Inputs.size() == 2);
3619 auto MaybeSimplifyHint = [&](const Use &U) {
3620 Value *Hint = U.get();
3621 // Not having a limit is safe because InstCombine removes unreachable
3622 // code.
3623 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3624 if (Hint != UnderlyingObject)
3625 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3626 };
3627 MaybeSimplifyHint(OBU.Inputs[0]);
3628 MaybeSimplifyHint(OBU.Inputs[1]);
3629 }
3630
3631 // Try to remove redundant alignment assumptions.
3632 if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
3634 *cast<AssumeInst>(II), II->arg_size() + Idx);
3635 if (!RK || RK.AttrKind != Attribute::Alignment ||
3637 continue;
3638
3639 // Remove align 1 bundles; they don't add any useful information.
3640 if (RK.ArgValue == 1)
3642
3643 // Don't try to remove align assumptions for pointers derived from
3644 // arguments. We might lose information if the function gets inline and
3645 // the align argument attribute disappears.
3647 if (!UO || isa<Argument>(UO))
3648 continue;
3649
3650 // Compute known bits for the pointer, passing nullptr as context to
3651 // avoid computeKnownBits using the assumption we are about to remove
3652 // for reasoning.
3653 KnownBits Known = computeKnownBits(RK.WasOn, /*CtxI=*/nullptr);
3654 unsigned TZ = std::min(Known.countMinTrailingZeros(),
3656 if ((1ULL << TZ) < RK.ArgValue)
3657 continue;
3659 }
3660 }
3661
3662 // Convert nonnull assume like:
3663 // %A = icmp ne i32* %PTR, null
3664 // call void @llvm.assume(i1 %A)
3665 // into
3666 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3668 match(IIOperand,
3670 A->getType()->isPointerTy()) {
3671 if (auto *Replacement = buildAssumeFromKnowledge(
3672 {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
3673
3674 Replacement->insertBefore(Next->getIterator());
3675 AC.registerAssumption(Replacement);
3676 return RemoveConditionFromAssume(II);
3677 }
3678 }
3679
3680 // Convert alignment assume like:
3681 // %B = ptrtoint i32* %A to i64
3682 // %C = and i64 %B, Constant
3683 // %D = icmp eq i64 %C, 0
3684 // call void @llvm.assume(i1 %D)
3685 // into
3686 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3687 uint64_t AlignMask = 1;
3688 if ((match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3689 match(IIOperand,
3691 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3692 m_Zero())))) {
3693 if (isPowerOf2_64(AlignMask + 1)) {
3694 uint64_t Offset = 0;
3696 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3697 /// Note: this doesn't preserve the offset information but merges
3698 /// offset and alignment.
3699 /// TODO: we can generate a GEP instead of merging the alignment with
3700 /// the offset.
3701 RetainedKnowledge RK{Attribute::Alignment,
3702 (unsigned)MinAlign(Offset, AlignMask + 1), A};
3703 if (auto *Replacement =
3705
3706 Replacement->insertAfter(II->getIterator());
3707 AC.registerAssumption(Replacement);
3708 }
3709 return RemoveConditionFromAssume(II);
3710 }
3711 }
3712 }
3713
3714 /// Canonicalize Knowledge in operand bundles.
3715 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3716 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3717 auto &BOI = II->bundle_op_info_begin()[Idx];
3720 if (BOI.End - BOI.Begin > 2)
3721 continue; // Prevent reducing knowledge in an align with offset since
3722 // extracting a RetainedKnowledge from them looses offset
3723 // information
3724 RetainedKnowledge CanonRK =
3727 &getDominatorTree());
3728 if (CanonRK == RK)
3729 continue;
3730 if (!CanonRK) {
3731 if (BOI.End - BOI.Begin > 0) {
3732 Worklist.pushValue(II->op_begin()[BOI.Begin]);
3733 Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
3734 }
3735 continue;
3736 }
3737 assert(RK.AttrKind == CanonRK.AttrKind);
3738 if (BOI.End - BOI.Begin > 0)
3739 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3740 if (BOI.End - BOI.Begin > 1)
3741 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3742 Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
3743 if (RK.WasOn)
3744 Worklist.pushValue(RK.WasOn);
3745 return II;
3746 }
3747 }
3748
3749 // If there is a dominating assume with the same condition as this one,
3750 // then this one is redundant, and should be removed.
3751 KnownBits Known(1);
3752 computeKnownBits(IIOperand, Known, II);
3754 return eraseInstFromFunction(*II);
3755
3756 // assume(false) is unreachable.
3757 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3759 return eraseInstFromFunction(*II);
3760 }
3761
3762 // Update the cache of affected values for this assumption (we might be
3763 // here because we just simplified the condition).
3764 AC.updateAffectedValues(cast<AssumeInst>(II));
3765 break;
3766 }
3767 case Intrinsic::experimental_guard: {
3768 // Is this guard followed by another guard? We scan forward over a small
3769 // fixed window of instructions to handle common cases with conditions
3770 // computed between guards.
3771 Instruction *NextInst = II->getNextNode();
3772 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3773 // Note: Using context-free form to avoid compile time blow up
3774 if (!isSafeToSpeculativelyExecute(NextInst))
3775 break;
3776 NextInst = NextInst->getNextNode();
3777 }
3778 Value *NextCond = nullptr;
3779 if (match(NextInst,
3781 Value *CurrCond = II->getArgOperand(0);
3782
3783 // Remove a guard that it is immediately preceded by an identical guard.
3784 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3785 if (CurrCond != NextCond) {
3786 Instruction *MoveI = II->getNextNode();
3787 while (MoveI != NextInst) {
3788 auto *Temp = MoveI;
3789 MoveI = MoveI->getNextNode();
3790 Temp->moveBefore(II->getIterator());
3791 }
3792 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3793 }
3794 eraseInstFromFunction(*NextInst);
3795 return II;
3796 }
3797 break;
3798 }
3799 case Intrinsic::vector_insert: {
3800 Value *Vec = II->getArgOperand(0);
3801 Value *SubVec = II->getArgOperand(1);
3802 Value *Idx = II->getArgOperand(2);
3803 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3804 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3805 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3806
3807 // Only canonicalize if the destination vector, Vec, and SubVec are all
3808 // fixed vectors.
3809 if (DstTy && VecTy && SubVecTy) {
3810 unsigned DstNumElts = DstTy->getNumElements();
3811 unsigned VecNumElts = VecTy->getNumElements();
3812 unsigned SubVecNumElts = SubVecTy->getNumElements();
3813 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3814
3815 // An insert that entirely overwrites Vec with SubVec is a nop.
3816 if (VecNumElts == SubVecNumElts)
3817 return replaceInstUsesWith(CI, SubVec);
3818
3819 // Widen SubVec into a vector of the same width as Vec, since
3820 // shufflevector requires the two input vectors to be the same width.
3821 // Elements beyond the bounds of SubVec within the widened vector are
3822 // undefined.
3823 SmallVector<int, 8> WidenMask;
3824 unsigned i;
3825 for (i = 0; i != SubVecNumElts; ++i)
3826 WidenMask.push_back(i);
3827 for (; i != VecNumElts; ++i)
3828 WidenMask.push_back(PoisonMaskElem);
3829
3830 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3831
3833 for (unsigned i = 0; i != IdxN; ++i)
3834 Mask.push_back(i);
3835 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3836 Mask.push_back(i);
3837 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3838 Mask.push_back(i);
3839
3840 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3841 return replaceInstUsesWith(CI, Shuffle);
3842 }
3843 break;
3844 }
3845 case Intrinsic::vector_extract: {
3846 Value *Vec = II->getArgOperand(0);
3847 Value *Idx = II->getArgOperand(1);
3848
3849 Type *ReturnType = II->getType();
3850 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3851 // ExtractIdx)
3852 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3853 Value *InsertTuple, *InsertIdx, *InsertValue;
3855 m_Value(InsertValue),
3856 m_Value(InsertIdx))) &&
3857 InsertValue->getType() == ReturnType) {
3858 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3859 // Case where we get the same index right after setting it.
3860 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3861 // InsertValue
3862 if (ExtractIdx == Index)
3863 return replaceInstUsesWith(CI, InsertValue);
3864 // If we are getting a different index than what was set in the
3865 // insert.vector intrinsic. We can just set the input tuple to the one up
3866 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3867 // InsertIndex), ExtractIndex)
3868 // --> extract.vector(InsertTuple, ExtractIndex)
3869 else
3870 return replaceOperand(CI, 0, InsertTuple);
3871 }
3872
3873 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3874 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3875
3876 if (DstTy && VecTy) {
3877 auto DstEltCnt = DstTy->getElementCount();
3878 auto VecEltCnt = VecTy->getElementCount();
3879 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3880
3881 // Extracting the entirety of Vec is a nop.
3882 if (DstEltCnt == VecTy->getElementCount()) {
3883 replaceInstUsesWith(CI, Vec);
3884 return eraseInstFromFunction(CI);
3885 }
3886
3887 // Only canonicalize to shufflevector if the destination vector and
3888 // Vec are fixed vectors.
3889 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3890 break;
3891
3893 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3894 Mask.push_back(IdxN + i);
3895
3896 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
3897 return replaceInstUsesWith(CI, Shuffle);
3898 }
3899 break;
3900 }
3901 case Intrinsic::experimental_vp_reverse: {
3902 Value *X;
3903 Value *Vec = II->getArgOperand(0);
3904 Value *Mask = II->getArgOperand(1);
3905 if (!match(Mask, m_AllOnes()))
3906 break;
3907 Value *EVL = II->getArgOperand(2);
3908 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
3909 // rev(unop rev(X)) --> unop X
3910 if (match(Vec,
3912 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
3913 auto *OldUnOp = cast<UnaryOperator>(Vec);
3915 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
3916 II->getIterator());
3917 return replaceInstUsesWith(CI, NewUnOp);
3918 }
3919 break;
3920 }
3921 case Intrinsic::vector_reduce_or:
3922 case Intrinsic::vector_reduce_and: {
3923 // Canonicalize logical or/and reductions:
3924 // Or reduction for i1 is represented as:
3925 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3926 // %res = cmp ne iReduxWidth %val, 0
3927 // And reduction for i1 is represented as:
3928 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
3929 // %res = cmp eq iReduxWidth %val, 11111
3930 Value *Arg = II->getArgOperand(0);
3931 Value *Vect;
3932
3933 if (Value *NewOp =
3934 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3935 replaceUse(II->getOperandUse(0), NewOp);
3936 return II;
3937 }
3938
3939 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3940 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
3941 if (FTy->getElementType() == Builder.getInt1Ty()) {
3942 Value *Res = Builder.CreateBitCast(
3943 Vect, Builder.getIntNTy(FTy->getNumElements()));
3944 if (IID == Intrinsic::vector_reduce_and) {
3945 Res = Builder.CreateICmpEQ(
3947 } else {
3948 assert(IID == Intrinsic::vector_reduce_or &&
3949 "Expected or reduction.");
3950 Res = Builder.CreateIsNotNull(Res);
3951 }
3952 if (Arg != Vect)
3953 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
3954 II->getType());
3955 return replaceInstUsesWith(CI, Res);
3956 }
3957 }
3958 [[fallthrough]];
3959 }
3960 case Intrinsic::vector_reduce_add: {
3961 if (IID == Intrinsic::vector_reduce_add) {
3962 // Convert vector_reduce_add(ZExt(<n x i1>)) to
3963 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3964 // Convert vector_reduce_add(SExt(<n x i1>)) to
3965 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3966 // Convert vector_reduce_add(<n x i1>) to
3967 // Trunc(ctpop(bitcast <n x i1> to in)).
3968 Value *Arg = II->getArgOperand(0);
3969 Value *Vect;
3970
3971 if (Value *NewOp =
3972 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
3973 replaceUse(II->getOperandUse(0), NewOp);
3974 return II;
3975 }
3976
3977 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
3978 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
3979 if (FTy->getElementType() == Builder.getInt1Ty()) {
3980 Value *V = Builder.CreateBitCast(
3981 Vect, Builder.getIntNTy(FTy->getNumElements()));
3982 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
3983 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
3984 if (Arg != Vect &&
3985 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
3986 Res = Builder.CreateNeg(Res);
3987 return replaceInstUsesWith(CI, Res);
3988 }
3989 }
3990
3991 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
3992 if (Value *Splat = getSplatValue(Arg)) {
3993 ElementCount VecToReduceCount =
3994 cast<VectorType>(Arg->getType())->getElementCount();
3995 if (VecToReduceCount.isFixed()) {
3996 unsigned VectorSize = VecToReduceCount.getFixedValue();
3997 return BinaryOperator::CreateMul(
3998 Splat,
3999 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
4000 /*ImplicitTrunc=*/true));
4001 }
4002 }
4003 }
4004 [[fallthrough]];
4005 }
4006 case Intrinsic::vector_reduce_xor: {
4007 if (IID == Intrinsic::vector_reduce_xor) {
4008 // Exclusive disjunction reduction over the vector with
4009 // (potentially-extended) i1 element type is actually a
4010 // (potentially-extended) arithmetic `add` reduction over the original
4011 // non-extended value:
4012 // vector_reduce_xor(?ext(<n x i1>))
4013 // -->
4014 // ?ext(vector_reduce_add(<n x i1>))
4015 Value *Arg = II->getArgOperand(0);
4016 Value *Vect;
4017
4018 if (Value *NewOp =
4019 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4020 replaceUse(II->getOperandUse(0), NewOp);
4021 return II;
4022 }
4023
4024 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4025 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4026 if (VTy->getElementType() == Builder.getInt1Ty()) {
4027 Value *Res = Builder.CreateAddReduce(Vect);
4028 if (Arg != Vect)
4029 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4030 II->getType());
4031 return replaceInstUsesWith(CI, Res);
4032 }
4033 }
4034 }
4035 [[fallthrough]];
4036 }
4037 case Intrinsic::vector_reduce_mul: {
4038 if (IID == Intrinsic::vector_reduce_mul) {
4039 // Multiplicative reduction over the vector with (potentially-extended)
4040 // i1 element type is actually a (potentially zero-extended)
4041 // logical `and` reduction over the original non-extended value:
4042 // vector_reduce_mul(?ext(<n x i1>))
4043 // -->
4044 // zext(vector_reduce_and(<n x i1>))
4045 Value *Arg = II->getArgOperand(0);
4046 Value *Vect;
4047
4048 if (Value *NewOp =
4049 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4050 replaceUse(II->getOperandUse(0), NewOp);
4051 return II;
4052 }
4053
4054 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4055 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4056 if (VTy->getElementType() == Builder.getInt1Ty()) {
4057 Value *Res = Builder.CreateAndReduce(Vect);
4058 Res = Builder.CreateZExt(Res, II->getType());
4059 return replaceInstUsesWith(CI, Res);
4060 }
4061 }
4062 }
4063 [[fallthrough]];
4064 }
4065 case Intrinsic::vector_reduce_umin:
4066 case Intrinsic::vector_reduce_umax: {
4067 if (IID == Intrinsic::vector_reduce_umin ||
4068 IID == Intrinsic::vector_reduce_umax) {
4069 // UMin/UMax reduction over the vector with (potentially-extended)
4070 // i1 element type is actually a (potentially-extended)
4071 // logical `and`/`or` reduction over the original non-extended value:
4072 // vector_reduce_u{min,max}(?ext(<n x i1>))
4073 // -->
4074 // ?ext(vector_reduce_{and,or}(<n x i1>))
4075 Value *Arg = II->getArgOperand(0);
4076 Value *Vect;
4077
4078 if (Value *NewOp =
4079 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4080 replaceUse(II->getOperandUse(0), NewOp);
4081 return II;
4082 }
4083
4084 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4085 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4086 if (VTy->getElementType() == Builder.getInt1Ty()) {
4087 Value *Res = IID == Intrinsic::vector_reduce_umin
4088 ? Builder.CreateAndReduce(Vect)
4089 : Builder.CreateOrReduce(Vect);
4090 if (Arg != Vect)
4091 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4092 II->getType());
4093 return replaceInstUsesWith(CI, Res);
4094 }
4095 }
4096 }
4097 [[fallthrough]];
4098 }
4099 case Intrinsic::vector_reduce_smin:
4100 case Intrinsic::vector_reduce_smax: {
4101 if (IID == Intrinsic::vector_reduce_smin ||
4102 IID == Intrinsic::vector_reduce_smax) {
4103 // SMin/SMax reduction over the vector with (potentially-extended)
4104 // i1 element type is actually a (potentially-extended)
4105 // logical `and`/`or` reduction over the original non-extended value:
4106 // vector_reduce_s{min,max}(<n x i1>)
4107 // -->
4108 // vector_reduce_{or,and}(<n x i1>)
4109 // and
4110 // vector_reduce_s{min,max}(sext(<n x i1>))
4111 // -->
4112 // sext(vector_reduce_{or,and}(<n x i1>))
4113 // and
4114 // vector_reduce_s{min,max}(zext(<n x i1>))
4115 // -->
4116 // zext(vector_reduce_{and,or}(<n x i1>))
4117 Value *Arg = II->getArgOperand(0);
4118 Value *Vect;
4119
4120 if (Value *NewOp =
4121 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4122 replaceUse(II->getOperandUse(0), NewOp);
4123 return II;
4124 }
4125
4126 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4127 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4128 if (VTy->getElementType() == Builder.getInt1Ty()) {
4129 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4130 if (Arg != Vect)
4131 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4132 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4133 (ExtOpc == Instruction::CastOps::ZExt))
4134 ? Builder.CreateAndReduce(Vect)
4135 : Builder.CreateOrReduce(Vect);
4136 if (Arg != Vect)
4137 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4138 return replaceInstUsesWith(CI, Res);
4139 }
4140 }
4141 }
4142 [[fallthrough]];
4143 }
4144 case Intrinsic::vector_reduce_fmax:
4145 case Intrinsic::vector_reduce_fmin:
4146 case Intrinsic::vector_reduce_fadd:
4147 case Intrinsic::vector_reduce_fmul: {
4148 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4149 IID != Intrinsic::vector_reduce_fmul) ||
4150 II->hasAllowReassoc();
4151 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4152 IID == Intrinsic::vector_reduce_fmul)
4153 ? 1
4154 : 0;
4155 Value *Arg = II->getArgOperand(ArgIdx);
4156 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4157 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4158 return nullptr;
4159 }
4160 break;
4161 }
4162 case Intrinsic::is_fpclass: {
4163 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4164 return I;
4165 break;
4166 }
4167 case Intrinsic::threadlocal_address: {
4168 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4169 MaybeAlign Align = II->getRetAlign();
4170 if (MinAlign > Align.valueOrOne()) {
4171 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4172 return II;
4173 }
4174 break;
4175 }
4176 case Intrinsic::frexp: {
4177 Value *X;
4178 // The first result is idempotent with the added complication of the struct
4179 // return, and the second result is zero because the value is already
4180 // normalized.
4181 if (match(II->getArgOperand(0), m_ExtractValue<0>(m_Value(X)))) {
4183 X = Builder.CreateInsertValue(
4184 X, Constant::getNullValue(II->getType()->getStructElementType(1)),
4185 1);
4186 return replaceInstUsesWith(*II, X);
4187 }
4188 }
4189 break;
4190 }
4191 case Intrinsic::get_active_lane_mask: {
4192 const APInt *Op0, *Op1;
4193 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4194 match(II->getOperand(1), m_APInt(Op1))) {
4195 Type *OpTy = II->getOperand(0)->getType();
4196 return replaceInstUsesWith(
4197 *II, Builder.CreateIntrinsic(
4198 II->getType(), Intrinsic::get_active_lane_mask,
4199 {Constant::getNullValue(OpTy),
4200 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4201 }
4202 break;
4203 }
4204 case Intrinsic::experimental_get_vector_length: {
4205 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4206 unsigned BitWidth =
4207 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4208 II->getType()->getScalarSizeInBits());
4209 ConstantRange Cnt =
4210 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4211 SQ.getWithInstruction(II))
4213 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4214 ->getValue()
4215 .zextOrTrunc(Cnt.getBitWidth());
4216 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4217 MaxLanes = MaxLanes.multiply(
4218 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4219
4220 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4221 return replaceInstUsesWith(
4222 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4223 return nullptr;
4224 }
4225 default: {
4226 // Handle target specific intrinsics
4227 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4228 if (V)
4229 return *V;
4230 break;
4231 }
4232 }
4233
4234 // Try to fold intrinsic into select/phi operands. This is legal if:
4235 // * The intrinsic is speculatable.
4236 // * The operand is one of the following:
4237 // - a phi.
4238 // - a select with a scalar condition.
4239 // - a select with a vector condition and II is not a cross lane operation.
4241 for (Value *Op : II->args()) {
4242 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4243 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4244 if (IsVectorCond &&
4245 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4246 continue;
4247 // Don't replace a scalar select with a more expensive vector select if
4248 // we can't simplify both arms of the select.
4249 bool SimplifyBothArms =
4250 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4252 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4253 return R;
4254 }
4255 if (auto *Phi = dyn_cast<PHINode>(Op))
4256 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4257 return R;
4258 }
4259 }
4260
4262 return Shuf;
4263
4265 return replaceInstUsesWith(*II, Reverse);
4266
4268 return replaceInstUsesWith(*II, Res);
4269
4270 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4271 // context, so it is handled in visitCallBase and we should trigger it.
4272 return visitCallBase(*II);
4273}
4274
4275// Fence instruction simplification
4277 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4278 // This check is solely here to handle arbitrary target-dependent syncscopes.
4279 // TODO: Can remove if does not matter in practice.
4280 if (NFI && FI.isIdenticalTo(NFI))
4281 return eraseInstFromFunction(FI);
4282
4283 // Returns true if FI1 is identical or stronger fence than FI2.
4284 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4285 auto FI1SyncScope = FI1->getSyncScopeID();
4286 // Consider same scope, where scope is global or single-thread.
4287 if (FI1SyncScope != FI2->getSyncScopeID() ||
4288 (FI1SyncScope != SyncScope::System &&
4289 FI1SyncScope != SyncScope::SingleThread))
4290 return false;
4291
4292 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4293 };
4294 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4295 return eraseInstFromFunction(FI);
4296
4297 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4298 if (isIdenticalOrStrongerFence(PFI, &FI))
4299 return eraseInstFromFunction(FI);
4300 return nullptr;
4301}
4302
4303// InvokeInst simplification
4305 return visitCallBase(II);
4306}
4307
4308// CallBrInst simplification
4310 return visitCallBase(CBI);
4311}
4312
4314 if (!CI->hasFnAttr("modular-format"))
4315 return nullptr;
4316
4318 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4319 // TODO: Make use of the first two arguments
4320 unsigned FirstArgIdx;
4321 [[maybe_unused]] bool Error;
4322 Error = Args[2].getAsInteger(10, FirstArgIdx);
4323 assert(!Error && "invalid first arg index");
4324 --FirstArgIdx;
4325 StringRef FnName = Args[3];
4326 StringRef ImplName = Args[4];
4328
4329 if (AllAspects.empty())
4330 return nullptr;
4331
4332 SmallVector<StringRef> NeededAspects;
4333 for (StringRef Aspect : AllAspects) {
4334 if (Aspect == "float") {
4335 if (llvm::any_of(
4336 llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
4337 CI->arg_end()),
4338 [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4339 NeededAspects.push_back("float");
4340 } else {
4341 // Unknown aspects are always considered to be needed.
4342 NeededAspects.push_back(Aspect);
4343 }
4344 }
4345
4346 if (NeededAspects.size() == AllAspects.size())
4347 return nullptr;
4348
4349 Module *M = CI->getModule();
4350 LLVMContext &Ctx = M->getContext();
4351 Function *Callee = CI->getCalledFunction();
4352 FunctionCallee ModularFn = M->getOrInsertFunction(
4353 FnName, Callee->getFunctionType(),
4354 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4355 CallInst *New = cast<CallInst>(CI->clone());
4356 New->setCalledFunction(ModularFn);
4357 New->removeFnAttr("modular-format");
4358 B.Insert(New);
4359
4360 const auto ReferenceAspect = [&](StringRef Aspect) {
4361 SmallString<20> Name = ImplName;
4362 Name += '_';
4363 Name += Aspect;
4364 Function *RelocNoneFn =
4365 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4366 B.CreateCall(RelocNoneFn,
4367 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4368 };
4369
4370 llvm::sort(NeededAspects);
4371 for (StringRef Request : NeededAspects)
4372 ReferenceAspect(Request);
4373
4374 return New;
4375}
4376
4377Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4378 if (!CI->getCalledFunction()) return nullptr;
4379
4380 // Skip optimizing notail and musttail calls so
4381 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4382 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4383 if (CI->isMustTailCall() || CI->isNoTailCall())
4384 return nullptr;
4385
4386 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4387 replaceInstUsesWith(*From, With);
4388 };
4389 auto InstCombineErase = [this](Instruction *I) {
4391 };
4392 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4393 InstCombineRAUW, InstCombineErase);
4394 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4395 ++NumSimplified;
4396 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4397 }
4398 if (Value *With = optimizeModularFormat(CI, Builder)) {
4399 ++NumSimplified;
4400 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4401 }
4402
4403 return nullptr;
4404}
4405
4407 // Strip off at most one level of pointer casts, looking for an alloca. This
4408 // is good enough in practice and simpler than handling any number of casts.
4409 Value *Underlying = TrampMem->stripPointerCasts();
4410 if (Underlying != TrampMem &&
4411 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4412 return nullptr;
4413 if (!isa<AllocaInst>(Underlying))
4414 return nullptr;
4415
4416 IntrinsicInst *InitTrampoline = nullptr;
4417 for (User *U : TrampMem->users()) {
4419 if (!II)
4420 return nullptr;
4421 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4422 if (InitTrampoline)
4423 // More than one init_trampoline writes to this value. Give up.
4424 return nullptr;
4425 InitTrampoline = II;
4426 continue;
4427 }
4428 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4429 // Allow any number of calls to adjust.trampoline.
4430 continue;
4431 return nullptr;
4432 }
4433
4434 // No call to init.trampoline found.
4435 if (!InitTrampoline)
4436 return nullptr;
4437
4438 // Check that the alloca is being used in the expected way.
4439 if (InitTrampoline->getOperand(0) != TrampMem)
4440 return nullptr;
4441
4442 return InitTrampoline;
4443}
4444
4446 Value *TrampMem) {
4447 // Visit all the previous instructions in the basic block, and try to find a
4448 // init.trampoline which has a direct path to the adjust.trampoline.
4449 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4450 E = AdjustTramp->getParent()->begin();
4451 I != E;) {
4452 Instruction *Inst = &*--I;
4454 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4455 II->getOperand(0) == TrampMem)
4456 return II;
4457 if (Inst->mayWriteToMemory())
4458 return nullptr;
4459 }
4460 return nullptr;
4461}
4462
4463// Given a call to llvm.adjust.trampoline, find and return the corresponding
4464// call to llvm.init.trampoline if the call to the trampoline can be optimized
4465// to a direct call to a function. Otherwise return NULL.
4467 Callee = Callee->stripPointerCasts();
4468 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4469 if (!AdjustTramp ||
4470 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4471 return nullptr;
4472
4473 Value *TrampMem = AdjustTramp->getOperand(0);
4474
4476 return IT;
4477 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4478 return IT;
4479 return nullptr;
4480}
4481
4482Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4483 const Value *Callee = Call.getCalledOperand();
4484 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4485 if (!IPC || !IPC->isNoopCast(DL))
4486 return nullptr;
4487
4488 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4489 if (!II)
4490 return nullptr;
4491
4492 Intrinsic::ID IIID = II->getIntrinsicID();
4493 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4494 return nullptr;
4495
4496 // Isolate the ptrauth bundle from the others.
4497 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4499 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4500 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4501 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4502 PtrAuthBundleOrNone = Bundle;
4503 else
4504 NewBundles.emplace_back(Bundle);
4505 }
4506
4507 if (!PtrAuthBundleOrNone)
4508 return nullptr;
4509
4510 Value *NewCallee = nullptr;
4511 switch (IIID) {
4512 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4513 // assuming the call bundle and the sign operands match.
4514 case Intrinsic::ptrauth_resign: {
4515 // Resign result key should match bundle.
4516 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4517 return nullptr;
4518 // Resign result discriminator should match bundle.
4519 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4520 return nullptr;
4521
4522 // Resign input (auth) key should also match: we can't change the key on
4523 // the new call we're generating, because we don't know what keys are valid.
4524 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4525 return nullptr;
4526
4527 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4528 NewBundles.emplace_back("ptrauth", NewBundleOps);
4529 NewCallee = II->getOperand(0);
4530 break;
4531 }
4532
4533 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4534 // assuming the call bundle and the sign operands match.
4535 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4536 case Intrinsic::ptrauth_sign: {
4537 // Sign key should match bundle.
4538 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4539 return nullptr;
4540 // Sign discriminator should match bundle.
4541 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4542 return nullptr;
4543 NewCallee = II->getOperand(0);
4544 break;
4545 }
4546 default:
4547 llvm_unreachable("unexpected intrinsic ID");
4548 }
4549
4550 if (!NewCallee)
4551 return nullptr;
4552
4553 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4554 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4555 NewCall->setCalledOperand(NewCallee);
4556 return NewCall;
4557}
4558
4559Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4561 if (!CPA)
4562 return nullptr;
4563
4564 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4565 // If the ptrauth constant isn't based on a function pointer, bail out.
4566 if (!CalleeF)
4567 return nullptr;
4568
4569 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4571 if (!PAB)
4572 return nullptr;
4573
4574 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4575 Value *Discriminator = PAB->Inputs[1];
4576
4577 // If the bundle doesn't match, this is probably going to fail to auth.
4578 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4579 return nullptr;
4580
4581 // If the bundle matches the constant, proceed in making this a direct call.
4583 NewCall->setCalledOperand(CalleeF);
4584 return NewCall;
4585}
4586
4587bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4588 const TargetLibraryInfo *TLI) {
4589 // Note: We only handle cases which can't be driven from generic attributes
4590 // here. So, for example, nonnull and noalias (which are common properties
4591 // of some allocation functions) are expected to be handled via annotation
4592 // of the respective allocator declaration with generic attributes.
4593 bool Changed = false;
4594
4595 if (!Call.getType()->isPointerTy())
4596 return Changed;
4597
4598 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4599 if (Size && *Size != 0) {
4600 // TODO: We really should just emit deref_or_null here and then
4601 // let the generic inference code combine that with nonnull.
4602 if (Call.hasRetAttr(Attribute::NonNull)) {
4603 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4605 Call.getContext(), Size->getLimitedValue()));
4606 } else {
4607 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4609 Call.getContext(), Size->getLimitedValue()));
4610 }
4611 }
4612
4613 // Add alignment attribute if alignment is a power of two constant.
4614 Value *Alignment = getAllocAlignment(&Call, TLI);
4615 if (!Alignment)
4616 return Changed;
4617
4618 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4619 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4620 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4621 if (llvm::isPowerOf2_64(AlignmentVal)) {
4622 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4623 Align NewAlign = Align(AlignmentVal);
4624 if (NewAlign > ExistingAlign) {
4627 Changed = true;
4628 }
4629 }
4630 }
4631 return Changed;
4632}
4633
4634/// Improvements for call, callbr and invoke instructions.
4635Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4636 bool Changed = annotateAnyAllocSite(Call, &TLI);
4637
4638 // Mark any parameters that are known to be non-null with the nonnull
4639 // attribute. This is helpful for inlining calls to functions with null
4640 // checks on their arguments.
4641 SmallVector<unsigned, 4> ArgNos;
4642 unsigned ArgNo = 0;
4643
4644 for (Value *V : Call.args()) {
4645 if (V->getType()->isPointerTy()) {
4646 // Simplify the nonnull operand if the parameter is known to be nonnull.
4647 // Otherwise, try to infer nonnull for it.
4648 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4649 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4650 (HasDereferenceable &&
4652 V->getType()->getPointerAddressSpace()))) {
4653 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4654 replaceOperand(Call, ArgNo, Res);
4655 Changed = true;
4656 }
4657 } else if (isKnownNonZero(V,
4658 getSimplifyQuery().getWithInstruction(&Call))) {
4659 ArgNos.push_back(ArgNo);
4660 }
4661 }
4662 ArgNo++;
4663 }
4664
4665 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4666
4667 if (!ArgNos.empty()) {
4668 AttributeList AS = Call.getAttributes();
4669 LLVMContext &Ctx = Call.getContext();
4670 AS = AS.addParamAttribute(Ctx, ArgNos,
4671 Attribute::get(Ctx, Attribute::NonNull));
4672 Call.setAttributes(AS);
4673 Changed = true;
4674 }
4675
4676 // If the callee is a pointer to a function, attempt to move any casts to the
4677 // arguments of the call/callbr/invoke.
4679 Function *CalleeF = dyn_cast<Function>(Callee);
4680 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4681 transformConstExprCastCall(Call))
4682 return nullptr;
4683
4684 if (CalleeF) {
4685 // Remove the convergent attr on calls when the callee is not convergent.
4686 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4687 !CalleeF->isIntrinsic()) {
4688 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4689 << "\n");
4691 return &Call;
4692 }
4693
4694 // If the call and callee calling conventions don't match, and neither one
4695 // of the calling conventions is compatible with C calling convention
4696 // this call must be unreachable, as the call is undefined.
4697 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4698 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4702 // Only do this for calls to a function with a body. A prototype may
4703 // not actually end up matching the implementation's calling conv for a
4704 // variety of reasons (e.g. it may be written in assembly).
4705 !CalleeF->isDeclaration()) {
4706 Instruction *OldCall = &Call;
4708 // If OldCall does not return void then replaceInstUsesWith poison.
4709 // This allows ValueHandlers and custom metadata to adjust itself.
4710 if (!OldCall->getType()->isVoidTy())
4711 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4712 if (isa<CallInst>(OldCall))
4713 return eraseInstFromFunction(*OldCall);
4714
4715 // We cannot remove an invoke or a callbr, because it would change thexi
4716 // CFG, just change the callee to a null pointer.
4717 cast<CallBase>(OldCall)->setCalledFunction(
4718 CalleeF->getFunctionType(),
4719 Constant::getNullValue(CalleeF->getType()));
4720 return nullptr;
4721 }
4722 }
4723
4724 // Calling a null function pointer is undefined if a null address isn't
4725 // dereferenceable.
4726 if ((isa<ConstantPointerNull>(Callee) &&
4728 isa<UndefValue>(Callee)) {
4729 // If Call does not return void then replaceInstUsesWith poison.
4730 // This allows ValueHandlers and custom metadata to adjust itself.
4731 if (!Call.getType()->isVoidTy())
4733
4734 if (Call.isTerminator()) {
4735 // Can't remove an invoke or callbr because we cannot change the CFG.
4736 return nullptr;
4737 }
4738
4739 // This instruction is not reachable, just remove it.
4742 }
4743
4744 if (IntrinsicInst *II = findInitTrampoline(Callee))
4745 return transformCallThroughTrampoline(Call, *II);
4746
4747 // Combine calls involving pointer authentication intrinsics.
4748 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4749 return NewCall;
4750
4751 // Combine calls to ptrauth constants.
4752 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4753 return NewCall;
4754
4755 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4756 InlineAsm *IA = cast<InlineAsm>(Callee);
4757 if (!IA->canThrow()) {
4758 // Normal inline asm calls cannot throw - mark them
4759 // 'nounwind'.
4761 Changed = true;
4762 }
4763 }
4764
4765 // Try to optimize the call if possible, we require DataLayout for most of
4766 // this. None of these calls are seen as possibly dead so go ahead and
4767 // delete the instruction now.
4768 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4769 Instruction *I = tryOptimizeCall(CI);
4770 // If we changed something return the result, etc. Otherwise let
4771 // the fallthrough check.
4772 if (I) return eraseInstFromFunction(*I);
4773 }
4774
4775 if (!Call.use_empty() && !Call.isMustTailCall())
4776 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4777 Type *CallTy = Call.getType();
4778 Type *RetArgTy = ReturnedArg->getType();
4779 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4780 return replaceInstUsesWith(
4781 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4782 }
4783
4784 // Drop unnecessary callee_type metadata from calls that were converted
4785 // into direct calls.
4786 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4787 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4788 Changed = true;
4789 }
4790
4791 // Drop unnecessary kcfi operand bundles from calls that were converted
4792 // into direct calls.
4794 if (Bundle && !Call.isIndirectCall()) {
4795 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4796 if (CalleeF) {
4797 ConstantInt *FunctionType = nullptr;
4798 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4799
4800 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4801 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4802
4803 if (FunctionType &&
4804 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4805 dbgs() << Call.getModule()->getName()
4806 << ": warning: kcfi: " << Call.getCaller()->getName()
4807 << ": call to " << CalleeF->getName()
4808 << " using a mismatching function pointer type\n";
4809 }
4810 });
4811
4813 }
4814
4815 if (isRemovableAlloc(&Call, &TLI))
4816 return visitAllocSite(Call);
4817
4818 // Handle intrinsics which can be used in both call and invoke context.
4819 switch (Call.getIntrinsicID()) {
4820 case Intrinsic::experimental_gc_statepoint: {
4821 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
4822 SmallPtrSet<Value *, 32> LiveGcValues;
4823 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4824 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4825
4826 // Remove the relocation if unused.
4827 if (GCR.use_empty()) {
4829 continue;
4830 }
4831
4832 Value *DerivedPtr = GCR.getDerivedPtr();
4833 Value *BasePtr = GCR.getBasePtr();
4834
4835 // Undef is undef, even after relocation.
4836 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
4839 continue;
4840 }
4841
4842 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
4843 // The relocation of null will be null for most any collector.
4844 // TODO: provide a hook for this in GCStrategy. There might be some
4845 // weird collector this property does not hold for.
4846 if (isa<ConstantPointerNull>(DerivedPtr)) {
4847 // Use null-pointer of gc_relocate's type to replace it.
4850 continue;
4851 }
4852
4853 // isKnownNonNull -> nonnull attribute
4854 if (!GCR.hasRetAttr(Attribute::NonNull) &&
4855 isKnownNonZero(DerivedPtr,
4856 getSimplifyQuery().getWithInstruction(&Call))) {
4857 GCR.addRetAttr(Attribute::NonNull);
4858 // We discovered new fact, re-check users.
4859 Worklist.pushUsersToWorkList(GCR);
4860 }
4861 }
4862
4863 // If we have two copies of the same pointer in the statepoint argument
4864 // list, canonicalize to one. This may let us common gc.relocates.
4865 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4866 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4867 auto *OpIntTy = GCR.getOperand(2)->getType();
4868 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
4869 }
4870
4871 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4872 // Canonicalize on the type from the uses to the defs
4873
4874 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4875 LiveGcValues.insert(BasePtr);
4876 LiveGcValues.insert(DerivedPtr);
4877 }
4878 std::optional<OperandBundleUse> Bundle =
4880 unsigned NumOfGCLives = LiveGcValues.size();
4881 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4882 break;
4883 // We can reduce the size of gc live bundle.
4884 DenseMap<Value *, unsigned> Val2Idx;
4885 std::vector<Value *> NewLiveGc;
4886 for (Value *V : Bundle->Inputs) {
4887 auto [It, Inserted] = Val2Idx.try_emplace(V);
4888 if (!Inserted)
4889 continue;
4890 if (LiveGcValues.count(V)) {
4891 It->second = NewLiveGc.size();
4892 NewLiveGc.push_back(V);
4893 } else
4894 It->second = NumOfGCLives;
4895 }
4896 // Update all gc.relocates
4897 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4898 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4899 Value *BasePtr = GCR.getBasePtr();
4900 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
4901 "Missed live gc for base pointer");
4902 auto *OpIntTy1 = GCR.getOperand(1)->getType();
4903 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
4904 Value *DerivedPtr = GCR.getDerivedPtr();
4905 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
4906 "Missed live gc for derived pointer");
4907 auto *OpIntTy2 = GCR.getOperand(2)->getType();
4908 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
4909 }
4910 // Create new statepoint instruction.
4911 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
4912 return CallBase::Create(&Call, NewBundle);
4913 }
4914 default: { break; }
4915 }
4916
4917 return Changed ? &Call : nullptr;
4918}
4919
4920/// If the callee is a constexpr cast of a function, attempt to move the cast to
4921/// the arguments of the call/invoke.
4922/// CallBrInst is not supported.
4923bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
4924 auto *Callee =
4926 if (!Callee)
4927 return false;
4928
4930 "CallBr's don't have a single point after a def to insert at");
4931
4932 // Don't perform the transform for declarations, which may not be fully
4933 // accurate. For example, void @foo() is commonly used as a placeholder for
4934 // unknown prototypes.
4935 if (Callee->isDeclaration())
4936 return false;
4937
4938 // If this is a call to a thunk function, don't remove the cast. Thunks are
4939 // used to transparently forward all incoming parameters and outgoing return
4940 // values, so it's important to leave the cast in place.
4941 if (Callee->hasFnAttribute("thunk"))
4942 return false;
4943
4944 // If this is a call to a naked function, the assembly might be
4945 // using an argument, or otherwise rely on the frame layout,
4946 // the function prototype will mismatch.
4947 if (Callee->hasFnAttribute(Attribute::Naked))
4948 return false;
4949
4950 // If this is a musttail call, the callee's prototype must match the caller's
4951 // prototype with the exception of pointee types. The code below doesn't
4952 // implement that, so we can't do this transform.
4953 // TODO: Do the transform if it only requires adding pointer casts.
4954 if (Call.isMustTailCall())
4955 return false;
4956
4958 const AttributeList &CallerPAL = Call.getAttributes();
4959
4960 // Okay, this is a cast from a function to a different type. Unless doing so
4961 // would cause a type conversion of one of our arguments, change this call to
4962 // be a direct call with arguments casted to the appropriate types.
4963 FunctionType *FT = Callee->getFunctionType();
4964 Type *OldRetTy = Caller->getType();
4965 Type *NewRetTy = FT->getReturnType();
4966
4967 // Check to see if we are changing the return type...
4968 if (OldRetTy != NewRetTy) {
4969
4970 if (NewRetTy->isStructTy())
4971 return false; // TODO: Handle multiple return values.
4972
4973 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4974 if (!Caller->use_empty())
4975 return false; // Cannot transform this return value.
4976 }
4977
4978 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4979 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4980 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
4981 NewRetTy, CallerPAL.getRetAttrs())))
4982 return false; // Attribute not compatible with transformed value.
4983 }
4984
4985 // If the callbase is an invoke instruction, and the return value is
4986 // used by a PHI node in a successor, we cannot change the return type of
4987 // the call because there is no place to put the cast instruction (without
4988 // breaking the critical edge). Bail out in this case.
4989 if (!Caller->use_empty()) {
4990 BasicBlock *PhisNotSupportedBlock = nullptr;
4991 if (auto *II = dyn_cast<InvokeInst>(Caller))
4992 PhisNotSupportedBlock = II->getNormalDest();
4993 if (PhisNotSupportedBlock)
4994 for (User *U : Caller->users())
4995 if (PHINode *PN = dyn_cast<PHINode>(U))
4996 if (PN->getParent() == PhisNotSupportedBlock)
4997 return false;
4998 }
4999 }
5000
5001 unsigned NumActualArgs = Call.arg_size();
5002 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5003
5004 // Prevent us turning:
5005 // declare void @takes_i32_inalloca(i32* inalloca)
5006 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5007 //
5008 // into:
5009 // call void @takes_i32_inalloca(i32* null)
5010 //
5011 // Similarly, avoid folding away bitcasts of byval calls.
5012 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
5013 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
5014 return false;
5015
5016 auto AI = Call.arg_begin();
5017 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5018 Type *ParamTy = FT->getParamType(i);
5019 Type *ActTy = (*AI)->getType();
5020
5021 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5022 return false; // Cannot transform this parameter value.
5023
5024 // Check if there are any incompatible attributes we cannot drop safely.
5025 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5026 .overlaps(AttributeFuncs::typeIncompatible(
5027 ParamTy, CallerPAL.getParamAttrs(i),
5028 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5029 return false; // Attribute not compatible with transformed value.
5030
5031 if (Call.isInAllocaArgument(i) ||
5032 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5033 return false; // Cannot transform to and from inalloca/preallocated.
5034
5035 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5036 return false;
5037
5038 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5039 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5040 return false; // Cannot transform to or from byval.
5041 }
5042
5043 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5044 !CallerPAL.isEmpty()) {
5045 // In this case we have more arguments than the new function type, but we
5046 // won't be dropping them. Check that these extra arguments have attributes
5047 // that are compatible with being a vararg call argument.
5048 unsigned SRetIdx;
5049 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5050 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5051 return false;
5052 }
5053
5054 // Okay, we decided that this is a safe thing to do: go ahead and start
5055 // inserting cast instructions as necessary.
5056 SmallVector<Value *, 8> Args;
5058 Args.reserve(NumActualArgs);
5059 ArgAttrs.reserve(NumActualArgs);
5060
5061 // Get any return attributes.
5062 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5063
5064 // If the return value is not being used, the type may not be compatible
5065 // with the existing attributes. Wipe out any problematic attributes.
5066 RAttrs.remove(
5067 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5068
5069 LLVMContext &Ctx = Call.getContext();
5070 AI = Call.arg_begin();
5071 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5072 Type *ParamTy = FT->getParamType(i);
5073
5074 Value *NewArg = *AI;
5075 if ((*AI)->getType() != ParamTy)
5076 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5077 Args.push_back(NewArg);
5078
5079 // Add any parameter attributes except the ones incompatible with the new
5080 // type. Note that we made sure all incompatible ones are safe to drop.
5081 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5082 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5083 ArgAttrs.push_back(
5084 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5085 }
5086
5087 // If the function takes more arguments than the call was taking, add them
5088 // now.
5089 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5090 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5091 ArgAttrs.push_back(AttributeSet());
5092 }
5093
5094 // If we are removing arguments to the function, emit an obnoxious warning.
5095 if (FT->getNumParams() < NumActualArgs) {
5096 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5097 if (FT->isVarArg()) {
5098 // Add all of the arguments in their promoted form to the arg list.
5099 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5100 Type *PTy = getPromotedType((*AI)->getType());
5101 Value *NewArg = *AI;
5102 if (PTy != (*AI)->getType()) {
5103 // Must promote to pass through va_arg area!
5104 Instruction::CastOps opcode =
5105 CastInst::getCastOpcode(*AI, false, PTy, false);
5106 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5107 }
5108 Args.push_back(NewArg);
5109
5110 // Add any parameter attributes.
5111 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5112 }
5113 }
5114 }
5115
5116 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5117
5118 if (NewRetTy->isVoidTy())
5119 Caller->setName(""); // Void type should not have a name.
5120
5121 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5122 "missing argument attributes");
5123 AttributeList NewCallerPAL = AttributeList::get(
5124 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5125
5127 Call.getOperandBundlesAsDefs(OpBundles);
5128
5129 CallBase *NewCall;
5130 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5131 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5132 II->getUnwindDest(), Args, OpBundles);
5133 } else {
5134 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5135 cast<CallInst>(NewCall)->setTailCallKind(
5136 cast<CallInst>(Caller)->getTailCallKind());
5137 }
5138 NewCall->takeName(Caller);
5140 NewCall->setAttributes(NewCallerPAL);
5141
5142 // Preserve prof metadata if any.
5143 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5144
5145 // Insert a cast of the return type as necessary.
5146 Instruction *NC = NewCall;
5147 Value *NV = NC;
5148 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5149 assert(!NV->getType()->isVoidTy());
5151 NC->setDebugLoc(Caller->getDebugLoc());
5152
5153 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5154 assert(OptInsertPt && "No place to insert cast");
5155 InsertNewInstBefore(NC, *OptInsertPt);
5156 Worklist.pushUsersToWorkList(*Caller);
5157 }
5158
5159 if (!Caller->use_empty())
5160 replaceInstUsesWith(*Caller, NV);
5161 else if (Caller->hasValueHandle()) {
5162 if (OldRetTy == NV->getType())
5164 else
5165 // We cannot call ValueIsRAUWd with a different type, and the
5166 // actual tracked value will disappear.
5168 }
5169
5170 eraseInstFromFunction(*Caller);
5171 return true;
5172}
5173
5174/// Turn a call to a function created by init_trampoline / adjust_trampoline
5175/// intrinsic pair into a direct call to the underlying function.
5177InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5178 IntrinsicInst &Tramp) {
5179 FunctionType *FTy = Call.getFunctionType();
5180 AttributeList Attrs = Call.getAttributes();
5181
5182 // If the call already has the 'nest' attribute somewhere then give up -
5183 // otherwise 'nest' would occur twice after splicing in the chain.
5184 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5185 return nullptr;
5186
5188 FunctionType *NestFTy = NestF->getFunctionType();
5189
5190 AttributeList NestAttrs = NestF->getAttributes();
5191 if (!NestAttrs.isEmpty()) {
5192 unsigned NestArgNo = 0;
5193 Type *NestTy = nullptr;
5194 AttributeSet NestAttr;
5195
5196 // Look for a parameter marked with the 'nest' attribute.
5197 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5198 E = NestFTy->param_end();
5199 I != E; ++NestArgNo, ++I) {
5200 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5201 if (AS.hasAttribute(Attribute::Nest)) {
5202 // Record the parameter type and any other attributes.
5203 NestTy = *I;
5204 NestAttr = AS;
5205 break;
5206 }
5207 }
5208
5209 if (NestTy) {
5210 std::vector<Value*> NewArgs;
5211 std::vector<AttributeSet> NewArgAttrs;
5212 NewArgs.reserve(Call.arg_size() + 1);
5213 NewArgAttrs.reserve(Call.arg_size());
5214
5215 // Insert the nest argument into the call argument list, which may
5216 // mean appending it. Likewise for attributes.
5217
5218 {
5219 unsigned ArgNo = 0;
5220 auto I = Call.arg_begin(), E = Call.arg_end();
5221 do {
5222 if (ArgNo == NestArgNo) {
5223 // Add the chain argument and attributes.
5224 Value *NestVal = Tramp.getArgOperand(2);
5225 if (NestVal->getType() != NestTy)
5226 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5227 NewArgs.push_back(NestVal);
5228 NewArgAttrs.push_back(NestAttr);
5229 }
5230
5231 if (I == E)
5232 break;
5233
5234 // Add the original argument and attributes.
5235 NewArgs.push_back(*I);
5236 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5237
5238 ++ArgNo;
5239 ++I;
5240 } while (true);
5241 }
5242
5243 // The trampoline may have been bitcast to a bogus type (FTy).
5244 // Handle this by synthesizing a new function type, equal to FTy
5245 // with the chain parameter inserted.
5246
5247 std::vector<Type*> NewTypes;
5248 NewTypes.reserve(FTy->getNumParams()+1);
5249
5250 // Insert the chain's type into the list of parameter types, which may
5251 // mean appending it.
5252 {
5253 unsigned ArgNo = 0;
5254 FunctionType::param_iterator I = FTy->param_begin(),
5255 E = FTy->param_end();
5256
5257 do {
5258 if (ArgNo == NestArgNo)
5259 // Add the chain's type.
5260 NewTypes.push_back(NestTy);
5261
5262 if (I == E)
5263 break;
5264
5265 // Add the original type.
5266 NewTypes.push_back(*I);
5267
5268 ++ArgNo;
5269 ++I;
5270 } while (true);
5271 }
5272
5273 // Replace the trampoline call with a direct call. Let the generic
5274 // code sort out any function type mismatches.
5275 FunctionType *NewFTy =
5276 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5277 AttributeList NewPAL =
5278 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5279 Attrs.getRetAttrs(), NewArgAttrs);
5280
5282 Call.getOperandBundlesAsDefs(OpBundles);
5283
5284 Instruction *NewCaller;
5285 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5286 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5287 II->getUnwindDest(), NewArgs, OpBundles);
5288 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5289 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5290 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5291 NewCaller =
5292 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5293 CBI->getIndirectDests(), NewArgs, OpBundles);
5294 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5295 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5296 } else {
5297 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5298 cast<CallInst>(NewCaller)->setTailCallKind(
5299 cast<CallInst>(Call).getTailCallKind());
5300 cast<CallInst>(NewCaller)->setCallingConv(
5301 cast<CallInst>(Call).getCallingConv());
5302 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5303 }
5304 NewCaller->setDebugLoc(Call.getDebugLoc());
5305
5306 return NewCaller;
5307 }
5308 }
5309
5310 // Replace the trampoline call with a direct call. Since there is no 'nest'
5311 // parameter, there is no need to adjust the argument list. Let the generic
5312 // code sort out any function type mismatches.
5313 Call.setCalledFunction(FTy, NestF);
5314 return &Call;
5315}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp1,...
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:259
bool isNegative() const
Definition APFloat.h:1512
void clearSign()
Definition APFloat.h:1361
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1151
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1959
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1939
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1946
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2047
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1952
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:312
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:304
This class represents any memset intrinsic.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:236
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:279
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:294
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:244
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:248
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:240
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
bool hasOperandBundles() const
Return true if this User has any operand bundles.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:871
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:811
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange multiply(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setNoSignedZeros(bool B=true)
Definition FMF.h:84
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:616
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:600
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:249
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition Value.h:576
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:329
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:502
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1424
LLVM_ABI CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2055
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2577
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:507
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2412
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2182
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
KnownFPClass computeKnownFPClass(Value *Val, FastMathFlags FMF, FPClassTest Interested=fcAllFlags, const Instruction *CtxI=nullptr, unsigned Depth=0) const
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
BuilderTy & Builder
AssumptionCache & getAssumptionCache() const
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1080
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:269
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:111
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:105
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:128
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:147
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1230
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1283
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
static constexpr uint64_t MaximumAlignment
Definition Value.h:830
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
static LLVM_ABI void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition Value.cpp:226
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:829
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty, true > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty, true > > > m_c_MaxOrMin(const LHS &L, const RHS &R)
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:195
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI RetainedKnowledge simplifyRetainedKnowledge(AssumeInst *Assume, RetainedKnowledge RK, AssumptionCache *AC, DominatorTree *DT)
canonicalize the RetainedKnowledge RK.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume, unsigned Idx)
Retreive the information help by Assume on the operand at index Idx.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1706
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
LLVM_ABI RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI)
This extracts the Knowledge from an element of an operand bundle.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1661
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1606
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1775
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI AssumeInst * buildAssumeFromKnowledge(ArrayRef< RetainedKnowledge > Knowledge, Instruction *CtxI, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Build and return a new assume created from the provided knowledge if the knowledge in the assume is f...
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1642
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr unsigned BitWidth
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:249
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1679
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:255
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:287
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:302
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:111
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:261
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:293
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:299
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:83
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
A lightweight accessor for an operand bundle meant to be passed around by value.
StringRef getTagName() const
Return the tag of this operand bundle as a string.
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
Represent one information held inside an operand bundle of an llvm.assume.
Attribute::AttrKind AttrKind
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const